main
Raw Download raw file
   1// Code generated by command: go run asm.go -out ../sha1cdblock_amd64.s -pkg sha1cd. DO NOT EDIT.
   2
   3//go:build !noasm && gc && amd64
   4
   5#include "textflag.h"
   6
   7// func blockAMD64(dig *digest, p []byte, m1 []uint32, cs [][5]uint32)
   8TEXT ยทblockAMD64(SB), NOSPLIT, $64-80
   9	MOVQ dig+0(FP), R8
  10	MOVQ p_base+8(FP), DI
  11	MOVQ p_len+16(FP), DX
  12	SHRQ $+6, DX
  13	SHLQ $+6, DX
  14	LEAQ (DI)(DX*1), SI
  15
  16	// Load h0, h1, h2, h3, h4.
  17	MOVL (R8), AX
  18	MOVL 4(R8), BX
  19	MOVL 8(R8), CX
  20	MOVL 12(R8), DX
  21	MOVL 16(R8), BP
  22
  23	// len(p) >= chunk
  24	CMPQ DI, SI
  25	JEQ  end
  26
  27loop:
  28	// Initialize registers a, b, c, d, e.
  29	MOVL AX, R10
  30	MOVL BX, R11
  31	MOVL CX, R12
  32	MOVL DX, R13
  33	MOVL BP, R14
  34
  35	// ROUND1 (steps 0-15)
  36	// Load cs
  37	MOVQ cs_base+56(FP), R8
  38	MOVL R10, (R8)
  39	MOVL R11, 4(R8)
  40	MOVL R12, 8(R8)
  41	MOVL R13, 12(R8)
  42	MOVL R14, 16(R8)
  43
  44	// ROUND1(0)
  45	// LOAD
  46	MOVL   (DI), R9
  47	BSWAPL R9
  48	MOVL   R9, (SP)
  49
  50	// FUNC1
  51	MOVL R13, R15
  52	XORL R12, R15
  53	ANDL R11, R15
  54	XORL R13, R15
  55
  56	// MIX
  57	ROLL $+30, R11
  58	ADDL R15, R14
  59	MOVL R10, R8
  60	ROLL $+5, R8
  61	LEAL 1518500249(R14)(R9*1), R14
  62	ADDL R8, R14
  63
  64	// Load m1
  65	MOVQ m1_base+32(FP), R8
  66	MOVL (SP), R9
  67	MOVL R9, (R8)
  68
  69	// ROUND1(1)
  70	// LOAD
  71	MOVL   4(DI), R9
  72	BSWAPL R9
  73	MOVL   R9, 4(SP)
  74
  75	// FUNC1
  76	MOVL R12, R15
  77	XORL R11, R15
  78	ANDL R10, R15
  79	XORL R12, R15
  80
  81	// MIX
  82	ROLL $+30, R10
  83	ADDL R15, R13
  84	MOVL R14, R8
  85	ROLL $+5, R8
  86	LEAL 1518500249(R13)(R9*1), R13
  87	ADDL R8, R13
  88
  89	// Load m1
  90	MOVQ m1_base+32(FP), R8
  91	MOVL 4(SP), R9
  92	MOVL R9, 4(R8)
  93
  94	// ROUND1(2)
  95	// LOAD
  96	MOVL   8(DI), R9
  97	BSWAPL R9
  98	MOVL   R9, 8(SP)
  99
 100	// FUNC1
 101	MOVL R11, R15
 102	XORL R10, R15
 103	ANDL R14, R15
 104	XORL R11, R15
 105
 106	// MIX
 107	ROLL $+30, R14
 108	ADDL R15, R12
 109	MOVL R13, R8
 110	ROLL $+5, R8
 111	LEAL 1518500249(R12)(R9*1), R12
 112	ADDL R8, R12
 113
 114	// Load m1
 115	MOVQ m1_base+32(FP), R8
 116	MOVL 8(SP), R9
 117	MOVL R9, 8(R8)
 118
 119	// ROUND1(3)
 120	// LOAD
 121	MOVL   12(DI), R9
 122	BSWAPL R9
 123	MOVL   R9, 12(SP)
 124
 125	// FUNC1
 126	MOVL R10, R15
 127	XORL R14, R15
 128	ANDL R13, R15
 129	XORL R10, R15
 130
 131	// MIX
 132	ROLL $+30, R13
 133	ADDL R15, R11
 134	MOVL R12, R8
 135	ROLL $+5, R8
 136	LEAL 1518500249(R11)(R9*1), R11
 137	ADDL R8, R11
 138
 139	// Load m1
 140	MOVQ m1_base+32(FP), R8
 141	MOVL 12(SP), R9
 142	MOVL R9, 12(R8)
 143
 144	// ROUND1(4)
 145	// LOAD
 146	MOVL   16(DI), R9
 147	BSWAPL R9
 148	MOVL   R9, 16(SP)
 149
 150	// FUNC1
 151	MOVL R14, R15
 152	XORL R13, R15
 153	ANDL R12, R15
 154	XORL R14, R15
 155
 156	// MIX
 157	ROLL $+30, R12
 158	ADDL R15, R10
 159	MOVL R11, R8
 160	ROLL $+5, R8
 161	LEAL 1518500249(R10)(R9*1), R10
 162	ADDL R8, R10
 163
 164	// Load m1
 165	MOVQ m1_base+32(FP), R8
 166	MOVL 16(SP), R9
 167	MOVL R9, 16(R8)
 168
 169	// ROUND1(5)
 170	// LOAD
 171	MOVL   20(DI), R9
 172	BSWAPL R9
 173	MOVL   R9, 20(SP)
 174
 175	// FUNC1
 176	MOVL R13, R15
 177	XORL R12, R15
 178	ANDL R11, R15
 179	XORL R13, R15
 180
 181	// MIX
 182	ROLL $+30, R11
 183	ADDL R15, R14
 184	MOVL R10, R8
 185	ROLL $+5, R8
 186	LEAL 1518500249(R14)(R9*1), R14
 187	ADDL R8, R14
 188
 189	// Load m1
 190	MOVQ m1_base+32(FP), R8
 191	MOVL 20(SP), R9
 192	MOVL R9, 20(R8)
 193
 194	// ROUND1(6)
 195	// LOAD
 196	MOVL   24(DI), R9
 197	BSWAPL R9
 198	MOVL   R9, 24(SP)
 199
 200	// FUNC1
 201	MOVL R12, R15
 202	XORL R11, R15
 203	ANDL R10, R15
 204	XORL R12, R15
 205
 206	// MIX
 207	ROLL $+30, R10
 208	ADDL R15, R13
 209	MOVL R14, R8
 210	ROLL $+5, R8
 211	LEAL 1518500249(R13)(R9*1), R13
 212	ADDL R8, R13
 213
 214	// Load m1
 215	MOVQ m1_base+32(FP), R8
 216	MOVL 24(SP), R9
 217	MOVL R9, 24(R8)
 218
 219	// ROUND1(7)
 220	// LOAD
 221	MOVL   28(DI), R9
 222	BSWAPL R9
 223	MOVL   R9, 28(SP)
 224
 225	// FUNC1
 226	MOVL R11, R15
 227	XORL R10, R15
 228	ANDL R14, R15
 229	XORL R11, R15
 230
 231	// MIX
 232	ROLL $+30, R14
 233	ADDL R15, R12
 234	MOVL R13, R8
 235	ROLL $+5, R8
 236	LEAL 1518500249(R12)(R9*1), R12
 237	ADDL R8, R12
 238
 239	// Load m1
 240	MOVQ m1_base+32(FP), R8
 241	MOVL 28(SP), R9
 242	MOVL R9, 28(R8)
 243
 244	// ROUND1(8)
 245	// LOAD
 246	MOVL   32(DI), R9
 247	BSWAPL R9
 248	MOVL   R9, 32(SP)
 249
 250	// FUNC1
 251	MOVL R10, R15
 252	XORL R14, R15
 253	ANDL R13, R15
 254	XORL R10, R15
 255
 256	// MIX
 257	ROLL $+30, R13
 258	ADDL R15, R11
 259	MOVL R12, R8
 260	ROLL $+5, R8
 261	LEAL 1518500249(R11)(R9*1), R11
 262	ADDL R8, R11
 263
 264	// Load m1
 265	MOVQ m1_base+32(FP), R8
 266	MOVL 32(SP), R9
 267	MOVL R9, 32(R8)
 268
 269	// ROUND1(9)
 270	// LOAD
 271	MOVL   36(DI), R9
 272	BSWAPL R9
 273	MOVL   R9, 36(SP)
 274
 275	// FUNC1
 276	MOVL R14, R15
 277	XORL R13, R15
 278	ANDL R12, R15
 279	XORL R14, R15
 280
 281	// MIX
 282	ROLL $+30, R12
 283	ADDL R15, R10
 284	MOVL R11, R8
 285	ROLL $+5, R8
 286	LEAL 1518500249(R10)(R9*1), R10
 287	ADDL R8, R10
 288
 289	// Load m1
 290	MOVQ m1_base+32(FP), R8
 291	MOVL 36(SP), R9
 292	MOVL R9, 36(R8)
 293
 294	// ROUND1(10)
 295	// LOAD
 296	MOVL   40(DI), R9
 297	BSWAPL R9
 298	MOVL   R9, 40(SP)
 299
 300	// FUNC1
 301	MOVL R13, R15
 302	XORL R12, R15
 303	ANDL R11, R15
 304	XORL R13, R15
 305
 306	// MIX
 307	ROLL $+30, R11
 308	ADDL R15, R14
 309	MOVL R10, R8
 310	ROLL $+5, R8
 311	LEAL 1518500249(R14)(R9*1), R14
 312	ADDL R8, R14
 313
 314	// Load m1
 315	MOVQ m1_base+32(FP), R8
 316	MOVL 40(SP), R9
 317	MOVL R9, 40(R8)
 318
 319	// ROUND1(11)
 320	// LOAD
 321	MOVL   44(DI), R9
 322	BSWAPL R9
 323	MOVL   R9, 44(SP)
 324
 325	// FUNC1
 326	MOVL R12, R15
 327	XORL R11, R15
 328	ANDL R10, R15
 329	XORL R12, R15
 330
 331	// MIX
 332	ROLL $+30, R10
 333	ADDL R15, R13
 334	MOVL R14, R8
 335	ROLL $+5, R8
 336	LEAL 1518500249(R13)(R9*1), R13
 337	ADDL R8, R13
 338
 339	// Load m1
 340	MOVQ m1_base+32(FP), R8
 341	MOVL 44(SP), R9
 342	MOVL R9, 44(R8)
 343
 344	// ROUND1(12)
 345	// LOAD
 346	MOVL   48(DI), R9
 347	BSWAPL R9
 348	MOVL   R9, 48(SP)
 349
 350	// FUNC1
 351	MOVL R11, R15
 352	XORL R10, R15
 353	ANDL R14, R15
 354	XORL R11, R15
 355
 356	// MIX
 357	ROLL $+30, R14
 358	ADDL R15, R12
 359	MOVL R13, R8
 360	ROLL $+5, R8
 361	LEAL 1518500249(R12)(R9*1), R12
 362	ADDL R8, R12
 363
 364	// Load m1
 365	MOVQ m1_base+32(FP), R8
 366	MOVL 48(SP), R9
 367	MOVL R9, 48(R8)
 368
 369	// ROUND1(13)
 370	// LOAD
 371	MOVL   52(DI), R9
 372	BSWAPL R9
 373	MOVL   R9, 52(SP)
 374
 375	// FUNC1
 376	MOVL R10, R15
 377	XORL R14, R15
 378	ANDL R13, R15
 379	XORL R10, R15
 380
 381	// MIX
 382	ROLL $+30, R13
 383	ADDL R15, R11
 384	MOVL R12, R8
 385	ROLL $+5, R8
 386	LEAL 1518500249(R11)(R9*1), R11
 387	ADDL R8, R11
 388
 389	// Load m1
 390	MOVQ m1_base+32(FP), R8
 391	MOVL 52(SP), R9
 392	MOVL R9, 52(R8)
 393
 394	// ROUND1(14)
 395	// LOAD
 396	MOVL   56(DI), R9
 397	BSWAPL R9
 398	MOVL   R9, 56(SP)
 399
 400	// FUNC1
 401	MOVL R14, R15
 402	XORL R13, R15
 403	ANDL R12, R15
 404	XORL R14, R15
 405
 406	// MIX
 407	ROLL $+30, R12
 408	ADDL R15, R10
 409	MOVL R11, R8
 410	ROLL $+5, R8
 411	LEAL 1518500249(R10)(R9*1), R10
 412	ADDL R8, R10
 413
 414	// Load m1
 415	MOVQ m1_base+32(FP), R8
 416	MOVL 56(SP), R9
 417	MOVL R9, 56(R8)
 418
 419	// ROUND1(15)
 420	// LOAD
 421	MOVL   60(DI), R9
 422	BSWAPL R9
 423	MOVL   R9, 60(SP)
 424
 425	// FUNC1
 426	MOVL R13, R15
 427	XORL R12, R15
 428	ANDL R11, R15
 429	XORL R13, R15
 430
 431	// MIX
 432	ROLL $+30, R11
 433	ADDL R15, R14
 434	MOVL R10, R8
 435	ROLL $+5, R8
 436	LEAL 1518500249(R14)(R9*1), R14
 437	ADDL R8, R14
 438
 439	// Load m1
 440	MOVQ m1_base+32(FP), R8
 441	MOVL 60(SP), R9
 442	MOVL R9, 60(R8)
 443
 444	// ROUND1x (steps 16-19) - same as ROUND1 but with no data load.
 445	// ROUND1x(16)
 446	// SHUFFLE
 447	MOVL (SP), R9
 448	XORL 52(SP), R9
 449	XORL 32(SP), R9
 450	XORL 8(SP), R9
 451	ROLL $+1, R9
 452	MOVL R9, (SP)
 453
 454	// FUNC1
 455	MOVL R12, R15
 456	XORL R11, R15
 457	ANDL R10, R15
 458	XORL R12, R15
 459
 460	// MIX
 461	ROLL $+30, R10
 462	ADDL R15, R13
 463	MOVL R14, R8
 464	ROLL $+5, R8
 465	LEAL 1518500249(R13)(R9*1), R13
 466	ADDL R8, R13
 467
 468	// Load m1
 469	MOVQ m1_base+32(FP), R8
 470	MOVL (SP), R9
 471	MOVL R9, 64(R8)
 472
 473	// ROUND1x(17)
 474	// SHUFFLE
 475	MOVL 4(SP), R9
 476	XORL 56(SP), R9
 477	XORL 36(SP), R9
 478	XORL 12(SP), R9
 479	ROLL $+1, R9
 480	MOVL R9, 4(SP)
 481
 482	// FUNC1
 483	MOVL R11, R15
 484	XORL R10, R15
 485	ANDL R14, R15
 486	XORL R11, R15
 487
 488	// MIX
 489	ROLL $+30, R14
 490	ADDL R15, R12
 491	MOVL R13, R8
 492	ROLL $+5, R8
 493	LEAL 1518500249(R12)(R9*1), R12
 494	ADDL R8, R12
 495
 496	// Load m1
 497	MOVQ m1_base+32(FP), R8
 498	MOVL 4(SP), R9
 499	MOVL R9, 68(R8)
 500
 501	// ROUND1x(18)
 502	// SHUFFLE
 503	MOVL 8(SP), R9
 504	XORL 60(SP), R9
 505	XORL 40(SP), R9
 506	XORL 16(SP), R9
 507	ROLL $+1, R9
 508	MOVL R9, 8(SP)
 509
 510	// FUNC1
 511	MOVL R10, R15
 512	XORL R14, R15
 513	ANDL R13, R15
 514	XORL R10, R15
 515
 516	// MIX
 517	ROLL $+30, R13
 518	ADDL R15, R11
 519	MOVL R12, R8
 520	ROLL $+5, R8
 521	LEAL 1518500249(R11)(R9*1), R11
 522	ADDL R8, R11
 523
 524	// Load m1
 525	MOVQ m1_base+32(FP), R8
 526	MOVL 8(SP), R9
 527	MOVL R9, 72(R8)
 528
 529	// ROUND1x(19)
 530	// SHUFFLE
 531	MOVL 12(SP), R9
 532	XORL (SP), R9
 533	XORL 44(SP), R9
 534	XORL 20(SP), R9
 535	ROLL $+1, R9
 536	MOVL R9, 12(SP)
 537
 538	// FUNC1
 539	MOVL R14, R15
 540	XORL R13, R15
 541	ANDL R12, R15
 542	XORL R14, R15
 543
 544	// MIX
 545	ROLL $+30, R12
 546	ADDL R15, R10
 547	MOVL R11, R8
 548	ROLL $+5, R8
 549	LEAL 1518500249(R10)(R9*1), R10
 550	ADDL R8, R10
 551
 552	// Load m1
 553	MOVQ m1_base+32(FP), R8
 554	MOVL 12(SP), R9
 555	MOVL R9, 76(R8)
 556
 557	// ROUND2 (steps 20-39)
 558	// ROUND2(20)
 559	// SHUFFLE
 560	MOVL 16(SP), R9
 561	XORL 4(SP), R9
 562	XORL 48(SP), R9
 563	XORL 24(SP), R9
 564	ROLL $+1, R9
 565	MOVL R9, 16(SP)
 566
 567	// FUNC2
 568	MOVL R11, R15
 569	XORL R12, R15
 570	XORL R13, R15
 571
 572	// MIX
 573	ROLL $+30, R11
 574	ADDL R15, R14
 575	MOVL R10, R8
 576	ROLL $+5, R8
 577	LEAL 1859775393(R14)(R9*1), R14
 578	ADDL R8, R14
 579
 580	// Load m1
 581	MOVQ m1_base+32(FP), R8
 582	MOVL 16(SP), R9
 583	MOVL R9, 80(R8)
 584
 585	// ROUND2(21)
 586	// SHUFFLE
 587	MOVL 20(SP), R9
 588	XORL 8(SP), R9
 589	XORL 52(SP), R9
 590	XORL 28(SP), R9
 591	ROLL $+1, R9
 592	MOVL R9, 20(SP)
 593
 594	// FUNC2
 595	MOVL R10, R15
 596	XORL R11, R15
 597	XORL R12, R15
 598
 599	// MIX
 600	ROLL $+30, R10
 601	ADDL R15, R13
 602	MOVL R14, R8
 603	ROLL $+5, R8
 604	LEAL 1859775393(R13)(R9*1), R13
 605	ADDL R8, R13
 606
 607	// Load m1
 608	MOVQ m1_base+32(FP), R8
 609	MOVL 20(SP), R9
 610	MOVL R9, 84(R8)
 611
 612	// ROUND2(22)
 613	// SHUFFLE
 614	MOVL 24(SP), R9
 615	XORL 12(SP), R9
 616	XORL 56(SP), R9
 617	XORL 32(SP), R9
 618	ROLL $+1, R9
 619	MOVL R9, 24(SP)
 620
 621	// FUNC2
 622	MOVL R14, R15
 623	XORL R10, R15
 624	XORL R11, R15
 625
 626	// MIX
 627	ROLL $+30, R14
 628	ADDL R15, R12
 629	MOVL R13, R8
 630	ROLL $+5, R8
 631	LEAL 1859775393(R12)(R9*1), R12
 632	ADDL R8, R12
 633
 634	// Load m1
 635	MOVQ m1_base+32(FP), R8
 636	MOVL 24(SP), R9
 637	MOVL R9, 88(R8)
 638
 639	// ROUND2(23)
 640	// SHUFFLE
 641	MOVL 28(SP), R9
 642	XORL 16(SP), R9
 643	XORL 60(SP), R9
 644	XORL 36(SP), R9
 645	ROLL $+1, R9
 646	MOVL R9, 28(SP)
 647
 648	// FUNC2
 649	MOVL R13, R15
 650	XORL R14, R15
 651	XORL R10, R15
 652
 653	// MIX
 654	ROLL $+30, R13
 655	ADDL R15, R11
 656	MOVL R12, R8
 657	ROLL $+5, R8
 658	LEAL 1859775393(R11)(R9*1), R11
 659	ADDL R8, R11
 660
 661	// Load m1
 662	MOVQ m1_base+32(FP), R8
 663	MOVL 28(SP), R9
 664	MOVL R9, 92(R8)
 665
 666	// ROUND2(24)
 667	// SHUFFLE
 668	MOVL 32(SP), R9
 669	XORL 20(SP), R9
 670	XORL (SP), R9
 671	XORL 40(SP), R9
 672	ROLL $+1, R9
 673	MOVL R9, 32(SP)
 674
 675	// FUNC2
 676	MOVL R12, R15
 677	XORL R13, R15
 678	XORL R14, R15
 679
 680	// MIX
 681	ROLL $+30, R12
 682	ADDL R15, R10
 683	MOVL R11, R8
 684	ROLL $+5, R8
 685	LEAL 1859775393(R10)(R9*1), R10
 686	ADDL R8, R10
 687
 688	// Load m1
 689	MOVQ m1_base+32(FP), R8
 690	MOVL 32(SP), R9
 691	MOVL R9, 96(R8)
 692
 693	// ROUND2(25)
 694	// SHUFFLE
 695	MOVL 36(SP), R9
 696	XORL 24(SP), R9
 697	XORL 4(SP), R9
 698	XORL 44(SP), R9
 699	ROLL $+1, R9
 700	MOVL R9, 36(SP)
 701
 702	// FUNC2
 703	MOVL R11, R15
 704	XORL R12, R15
 705	XORL R13, R15
 706
 707	// MIX
 708	ROLL $+30, R11
 709	ADDL R15, R14
 710	MOVL R10, R8
 711	ROLL $+5, R8
 712	LEAL 1859775393(R14)(R9*1), R14
 713	ADDL R8, R14
 714
 715	// Load m1
 716	MOVQ m1_base+32(FP), R8
 717	MOVL 36(SP), R9
 718	MOVL R9, 100(R8)
 719
 720	// ROUND2(26)
 721	// SHUFFLE
 722	MOVL 40(SP), R9
 723	XORL 28(SP), R9
 724	XORL 8(SP), R9
 725	XORL 48(SP), R9
 726	ROLL $+1, R9
 727	MOVL R9, 40(SP)
 728
 729	// FUNC2
 730	MOVL R10, R15
 731	XORL R11, R15
 732	XORL R12, R15
 733
 734	// MIX
 735	ROLL $+30, R10
 736	ADDL R15, R13
 737	MOVL R14, R8
 738	ROLL $+5, R8
 739	LEAL 1859775393(R13)(R9*1), R13
 740	ADDL R8, R13
 741
 742	// Load m1
 743	MOVQ m1_base+32(FP), R8
 744	MOVL 40(SP), R9
 745	MOVL R9, 104(R8)
 746
 747	// ROUND2(27)
 748	// SHUFFLE
 749	MOVL 44(SP), R9
 750	XORL 32(SP), R9
 751	XORL 12(SP), R9
 752	XORL 52(SP), R9
 753	ROLL $+1, R9
 754	MOVL R9, 44(SP)
 755
 756	// FUNC2
 757	MOVL R14, R15
 758	XORL R10, R15
 759	XORL R11, R15
 760
 761	// MIX
 762	ROLL $+30, R14
 763	ADDL R15, R12
 764	MOVL R13, R8
 765	ROLL $+5, R8
 766	LEAL 1859775393(R12)(R9*1), R12
 767	ADDL R8, R12
 768
 769	// Load m1
 770	MOVQ m1_base+32(FP), R8
 771	MOVL 44(SP), R9
 772	MOVL R9, 108(R8)
 773
 774	// ROUND2(28)
 775	// SHUFFLE
 776	MOVL 48(SP), R9
 777	XORL 36(SP), R9
 778	XORL 16(SP), R9
 779	XORL 56(SP), R9
 780	ROLL $+1, R9
 781	MOVL R9, 48(SP)
 782
 783	// FUNC2
 784	MOVL R13, R15
 785	XORL R14, R15
 786	XORL R10, R15
 787
 788	// MIX
 789	ROLL $+30, R13
 790	ADDL R15, R11
 791	MOVL R12, R8
 792	ROLL $+5, R8
 793	LEAL 1859775393(R11)(R9*1), R11
 794	ADDL R8, R11
 795
 796	// Load m1
 797	MOVQ m1_base+32(FP), R8
 798	MOVL 48(SP), R9
 799	MOVL R9, 112(R8)
 800
 801	// ROUND2(29)
 802	// SHUFFLE
 803	MOVL 52(SP), R9
 804	XORL 40(SP), R9
 805	XORL 20(SP), R9
 806	XORL 60(SP), R9
 807	ROLL $+1, R9
 808	MOVL R9, 52(SP)
 809
 810	// FUNC2
 811	MOVL R12, R15
 812	XORL R13, R15
 813	XORL R14, R15
 814
 815	// MIX
 816	ROLL $+30, R12
 817	ADDL R15, R10
 818	MOVL R11, R8
 819	ROLL $+5, R8
 820	LEAL 1859775393(R10)(R9*1), R10
 821	ADDL R8, R10
 822
 823	// Load m1
 824	MOVQ m1_base+32(FP), R8
 825	MOVL 52(SP), R9
 826	MOVL R9, 116(R8)
 827
 828	// ROUND2(30)
 829	// SHUFFLE
 830	MOVL 56(SP), R9
 831	XORL 44(SP), R9
 832	XORL 24(SP), R9
 833	XORL (SP), R9
 834	ROLL $+1, R9
 835	MOVL R9, 56(SP)
 836
 837	// FUNC2
 838	MOVL R11, R15
 839	XORL R12, R15
 840	XORL R13, R15
 841
 842	// MIX
 843	ROLL $+30, R11
 844	ADDL R15, R14
 845	MOVL R10, R8
 846	ROLL $+5, R8
 847	LEAL 1859775393(R14)(R9*1), R14
 848	ADDL R8, R14
 849
 850	// Load m1
 851	MOVQ m1_base+32(FP), R8
 852	MOVL 56(SP), R9
 853	MOVL R9, 120(R8)
 854
 855	// ROUND2(31)
 856	// SHUFFLE
 857	MOVL 60(SP), R9
 858	XORL 48(SP), R9
 859	XORL 28(SP), R9
 860	XORL 4(SP), R9
 861	ROLL $+1, R9
 862	MOVL R9, 60(SP)
 863
 864	// FUNC2
 865	MOVL R10, R15
 866	XORL R11, R15
 867	XORL R12, R15
 868
 869	// MIX
 870	ROLL $+30, R10
 871	ADDL R15, R13
 872	MOVL R14, R8
 873	ROLL $+5, R8
 874	LEAL 1859775393(R13)(R9*1), R13
 875	ADDL R8, R13
 876
 877	// Load m1
 878	MOVQ m1_base+32(FP), R8
 879	MOVL 60(SP), R9
 880	MOVL R9, 124(R8)
 881
 882	// ROUND2(32)
 883	// SHUFFLE
 884	MOVL (SP), R9
 885	XORL 52(SP), R9
 886	XORL 32(SP), R9
 887	XORL 8(SP), R9
 888	ROLL $+1, R9
 889	MOVL R9, (SP)
 890
 891	// FUNC2
 892	MOVL R14, R15
 893	XORL R10, R15
 894	XORL R11, R15
 895
 896	// MIX
 897	ROLL $+30, R14
 898	ADDL R15, R12
 899	MOVL R13, R8
 900	ROLL $+5, R8
 901	LEAL 1859775393(R12)(R9*1), R12
 902	ADDL R8, R12
 903
 904	// Load m1
 905	MOVQ m1_base+32(FP), R8
 906	MOVL (SP), R9
 907	MOVL R9, 128(R8)
 908
 909	// ROUND2(33)
 910	// SHUFFLE
 911	MOVL 4(SP), R9
 912	XORL 56(SP), R9
 913	XORL 36(SP), R9
 914	XORL 12(SP), R9
 915	ROLL $+1, R9
 916	MOVL R9, 4(SP)
 917
 918	// FUNC2
 919	MOVL R13, R15
 920	XORL R14, R15
 921	XORL R10, R15
 922
 923	// MIX
 924	ROLL $+30, R13
 925	ADDL R15, R11
 926	MOVL R12, R8
 927	ROLL $+5, R8
 928	LEAL 1859775393(R11)(R9*1), R11
 929	ADDL R8, R11
 930
 931	// Load m1
 932	MOVQ m1_base+32(FP), R8
 933	MOVL 4(SP), R9
 934	MOVL R9, 132(R8)
 935
 936	// ROUND2(34)
 937	// SHUFFLE
 938	MOVL 8(SP), R9
 939	XORL 60(SP), R9
 940	XORL 40(SP), R9
 941	XORL 16(SP), R9
 942	ROLL $+1, R9
 943	MOVL R9, 8(SP)
 944
 945	// FUNC2
 946	MOVL R12, R15
 947	XORL R13, R15
 948	XORL R14, R15
 949
 950	// MIX
 951	ROLL $+30, R12
 952	ADDL R15, R10
 953	MOVL R11, R8
 954	ROLL $+5, R8
 955	LEAL 1859775393(R10)(R9*1), R10
 956	ADDL R8, R10
 957
 958	// Load m1
 959	MOVQ m1_base+32(FP), R8
 960	MOVL 8(SP), R9
 961	MOVL R9, 136(R8)
 962
 963	// ROUND2(35)
 964	// SHUFFLE
 965	MOVL 12(SP), R9
 966	XORL (SP), R9
 967	XORL 44(SP), R9
 968	XORL 20(SP), R9
 969	ROLL $+1, R9
 970	MOVL R9, 12(SP)
 971
 972	// FUNC2
 973	MOVL R11, R15
 974	XORL R12, R15
 975	XORL R13, R15
 976
 977	// MIX
 978	ROLL $+30, R11
 979	ADDL R15, R14
 980	MOVL R10, R8
 981	ROLL $+5, R8
 982	LEAL 1859775393(R14)(R9*1), R14
 983	ADDL R8, R14
 984
 985	// Load m1
 986	MOVQ m1_base+32(FP), R8
 987	MOVL 12(SP), R9
 988	MOVL R9, 140(R8)
 989
 990	// ROUND2(36)
 991	// SHUFFLE
 992	MOVL 16(SP), R9
 993	XORL 4(SP), R9
 994	XORL 48(SP), R9
 995	XORL 24(SP), R9
 996	ROLL $+1, R9
 997	MOVL R9, 16(SP)
 998
 999	// FUNC2
1000	MOVL R10, R15
1001	XORL R11, R15
1002	XORL R12, R15
1003
1004	// MIX
1005	ROLL $+30, R10
1006	ADDL R15, R13
1007	MOVL R14, R8
1008	ROLL $+5, R8
1009	LEAL 1859775393(R13)(R9*1), R13
1010	ADDL R8, R13
1011
1012	// Load m1
1013	MOVQ m1_base+32(FP), R8
1014	MOVL 16(SP), R9
1015	MOVL R9, 144(R8)
1016
1017	// ROUND2(37)
1018	// SHUFFLE
1019	MOVL 20(SP), R9
1020	XORL 8(SP), R9
1021	XORL 52(SP), R9
1022	XORL 28(SP), R9
1023	ROLL $+1, R9
1024	MOVL R9, 20(SP)
1025
1026	// FUNC2
1027	MOVL R14, R15
1028	XORL R10, R15
1029	XORL R11, R15
1030
1031	// MIX
1032	ROLL $+30, R14
1033	ADDL R15, R12
1034	MOVL R13, R8
1035	ROLL $+5, R8
1036	LEAL 1859775393(R12)(R9*1), R12
1037	ADDL R8, R12
1038
1039	// Load m1
1040	MOVQ m1_base+32(FP), R8
1041	MOVL 20(SP), R9
1042	MOVL R9, 148(R8)
1043
1044	// ROUND2(38)
1045	// SHUFFLE
1046	MOVL 24(SP), R9
1047	XORL 12(SP), R9
1048	XORL 56(SP), R9
1049	XORL 32(SP), R9
1050	ROLL $+1, R9
1051	MOVL R9, 24(SP)
1052
1053	// FUNC2
1054	MOVL R13, R15
1055	XORL R14, R15
1056	XORL R10, R15
1057
1058	// MIX
1059	ROLL $+30, R13
1060	ADDL R15, R11
1061	MOVL R12, R8
1062	ROLL $+5, R8
1063	LEAL 1859775393(R11)(R9*1), R11
1064	ADDL R8, R11
1065
1066	// Load m1
1067	MOVQ m1_base+32(FP), R8
1068	MOVL 24(SP), R9
1069	MOVL R9, 152(R8)
1070
1071	// ROUND2(39)
1072	// SHUFFLE
1073	MOVL 28(SP), R9
1074	XORL 16(SP), R9
1075	XORL 60(SP), R9
1076	XORL 36(SP), R9
1077	ROLL $+1, R9
1078	MOVL R9, 28(SP)
1079
1080	// FUNC2
1081	MOVL R12, R15
1082	XORL R13, R15
1083	XORL R14, R15
1084
1085	// MIX
1086	ROLL $+30, R12
1087	ADDL R15, R10
1088	MOVL R11, R8
1089	ROLL $+5, R8
1090	LEAL 1859775393(R10)(R9*1), R10
1091	ADDL R8, R10
1092
1093	// Load m1
1094	MOVQ m1_base+32(FP), R8
1095	MOVL 28(SP), R9
1096	MOVL R9, 156(R8)
1097
1098	// ROUND3 (steps 40-59)
1099	// ROUND3(40)
1100	// SHUFFLE
1101	MOVL 32(SP), R9
1102	XORL 20(SP), R9
1103	XORL (SP), R9
1104	XORL 40(SP), R9
1105	ROLL $+1, R9
1106	MOVL R9, 32(SP)
1107
1108	// FUNC3
1109	MOVL R11, R8
1110	ORL  R12, R8
1111	ANDL R13, R8
1112	MOVL R11, R15
1113	ANDL R12, R15
1114	ORL  R8, R15
1115
1116	// MIX
1117	ROLL $+30, R11
1118	ADDL R15, R14
1119	MOVL R10, R8
1120	ROLL $+5, R8
1121	LEAL 2400959708(R14)(R9*1), R14
1122	ADDL R8, R14
1123
1124	// Load m1
1125	MOVQ m1_base+32(FP), R8
1126	MOVL 32(SP), R9
1127	MOVL R9, 160(R8)
1128
1129	// ROUND3(41)
1130	// SHUFFLE
1131	MOVL 36(SP), R9
1132	XORL 24(SP), R9
1133	XORL 4(SP), R9
1134	XORL 44(SP), R9
1135	ROLL $+1, R9
1136	MOVL R9, 36(SP)
1137
1138	// FUNC3
1139	MOVL R10, R8
1140	ORL  R11, R8
1141	ANDL R12, R8
1142	MOVL R10, R15
1143	ANDL R11, R15
1144	ORL  R8, R15
1145
1146	// MIX
1147	ROLL $+30, R10
1148	ADDL R15, R13
1149	MOVL R14, R8
1150	ROLL $+5, R8
1151	LEAL 2400959708(R13)(R9*1), R13
1152	ADDL R8, R13
1153
1154	// Load m1
1155	MOVQ m1_base+32(FP), R8
1156	MOVL 36(SP), R9
1157	MOVL R9, 164(R8)
1158
1159	// ROUND3(42)
1160	// SHUFFLE
1161	MOVL 40(SP), R9
1162	XORL 28(SP), R9
1163	XORL 8(SP), R9
1164	XORL 48(SP), R9
1165	ROLL $+1, R9
1166	MOVL R9, 40(SP)
1167
1168	// FUNC3
1169	MOVL R14, R8
1170	ORL  R10, R8
1171	ANDL R11, R8
1172	MOVL R14, R15
1173	ANDL R10, R15
1174	ORL  R8, R15
1175
1176	// MIX
1177	ROLL $+30, R14
1178	ADDL R15, R12
1179	MOVL R13, R8
1180	ROLL $+5, R8
1181	LEAL 2400959708(R12)(R9*1), R12
1182	ADDL R8, R12
1183
1184	// Load m1
1185	MOVQ m1_base+32(FP), R8
1186	MOVL 40(SP), R9
1187	MOVL R9, 168(R8)
1188
1189	// ROUND3(43)
1190	// SHUFFLE
1191	MOVL 44(SP), R9
1192	XORL 32(SP), R9
1193	XORL 12(SP), R9
1194	XORL 52(SP), R9
1195	ROLL $+1, R9
1196	MOVL R9, 44(SP)
1197
1198	// FUNC3
1199	MOVL R13, R8
1200	ORL  R14, R8
1201	ANDL R10, R8
1202	MOVL R13, R15
1203	ANDL R14, R15
1204	ORL  R8, R15
1205
1206	// MIX
1207	ROLL $+30, R13
1208	ADDL R15, R11
1209	MOVL R12, R8
1210	ROLL $+5, R8
1211	LEAL 2400959708(R11)(R9*1), R11
1212	ADDL R8, R11
1213
1214	// Load m1
1215	MOVQ m1_base+32(FP), R8
1216	MOVL 44(SP), R9
1217	MOVL R9, 172(R8)
1218
1219	// ROUND3(44)
1220	// SHUFFLE
1221	MOVL 48(SP), R9
1222	XORL 36(SP), R9
1223	XORL 16(SP), R9
1224	XORL 56(SP), R9
1225	ROLL $+1, R9
1226	MOVL R9, 48(SP)
1227
1228	// FUNC3
1229	MOVL R12, R8
1230	ORL  R13, R8
1231	ANDL R14, R8
1232	MOVL R12, R15
1233	ANDL R13, R15
1234	ORL  R8, R15
1235
1236	// MIX
1237	ROLL $+30, R12
1238	ADDL R15, R10
1239	MOVL R11, R8
1240	ROLL $+5, R8
1241	LEAL 2400959708(R10)(R9*1), R10
1242	ADDL R8, R10
1243
1244	// Load m1
1245	MOVQ m1_base+32(FP), R8
1246	MOVL 48(SP), R9
1247	MOVL R9, 176(R8)
1248
1249	// ROUND3(45)
1250	// SHUFFLE
1251	MOVL 52(SP), R9
1252	XORL 40(SP), R9
1253	XORL 20(SP), R9
1254	XORL 60(SP), R9
1255	ROLL $+1, R9
1256	MOVL R9, 52(SP)
1257
1258	// FUNC3
1259	MOVL R11, R8
1260	ORL  R12, R8
1261	ANDL R13, R8
1262	MOVL R11, R15
1263	ANDL R12, R15
1264	ORL  R8, R15
1265
1266	// MIX
1267	ROLL $+30, R11
1268	ADDL R15, R14
1269	MOVL R10, R8
1270	ROLL $+5, R8
1271	LEAL 2400959708(R14)(R9*1), R14
1272	ADDL R8, R14
1273
1274	// Load m1
1275	MOVQ m1_base+32(FP), R8
1276	MOVL 52(SP), R9
1277	MOVL R9, 180(R8)
1278
1279	// ROUND3(46)
1280	// SHUFFLE
1281	MOVL 56(SP), R9
1282	XORL 44(SP), R9
1283	XORL 24(SP), R9
1284	XORL (SP), R9
1285	ROLL $+1, R9
1286	MOVL R9, 56(SP)
1287
1288	// FUNC3
1289	MOVL R10, R8
1290	ORL  R11, R8
1291	ANDL R12, R8
1292	MOVL R10, R15
1293	ANDL R11, R15
1294	ORL  R8, R15
1295
1296	// MIX
1297	ROLL $+30, R10
1298	ADDL R15, R13
1299	MOVL R14, R8
1300	ROLL $+5, R8
1301	LEAL 2400959708(R13)(R9*1), R13
1302	ADDL R8, R13
1303
1304	// Load m1
1305	MOVQ m1_base+32(FP), R8
1306	MOVL 56(SP), R9
1307	MOVL R9, 184(R8)
1308
1309	// ROUND3(47)
1310	// SHUFFLE
1311	MOVL 60(SP), R9
1312	XORL 48(SP), R9
1313	XORL 28(SP), R9
1314	XORL 4(SP), R9
1315	ROLL $+1, R9
1316	MOVL R9, 60(SP)
1317
1318	// FUNC3
1319	MOVL R14, R8
1320	ORL  R10, R8
1321	ANDL R11, R8
1322	MOVL R14, R15
1323	ANDL R10, R15
1324	ORL  R8, R15
1325
1326	// MIX
1327	ROLL $+30, R14
1328	ADDL R15, R12
1329	MOVL R13, R8
1330	ROLL $+5, R8
1331	LEAL 2400959708(R12)(R9*1), R12
1332	ADDL R8, R12
1333
1334	// Load m1
1335	MOVQ m1_base+32(FP), R8
1336	MOVL 60(SP), R9
1337	MOVL R9, 188(R8)
1338
1339	// ROUND3(48)
1340	// SHUFFLE
1341	MOVL (SP), R9
1342	XORL 52(SP), R9
1343	XORL 32(SP), R9
1344	XORL 8(SP), R9
1345	ROLL $+1, R9
1346	MOVL R9, (SP)
1347
1348	// FUNC3
1349	MOVL R13, R8
1350	ORL  R14, R8
1351	ANDL R10, R8
1352	MOVL R13, R15
1353	ANDL R14, R15
1354	ORL  R8, R15
1355
1356	// MIX
1357	ROLL $+30, R13
1358	ADDL R15, R11
1359	MOVL R12, R8
1360	ROLL $+5, R8
1361	LEAL 2400959708(R11)(R9*1), R11
1362	ADDL R8, R11
1363
1364	// Load m1
1365	MOVQ m1_base+32(FP), R8
1366	MOVL (SP), R9
1367	MOVL R9, 192(R8)
1368
1369	// ROUND3(49)
1370	// SHUFFLE
1371	MOVL 4(SP), R9
1372	XORL 56(SP), R9
1373	XORL 36(SP), R9
1374	XORL 12(SP), R9
1375	ROLL $+1, R9
1376	MOVL R9, 4(SP)
1377
1378	// FUNC3
1379	MOVL R12, R8
1380	ORL  R13, R8
1381	ANDL R14, R8
1382	MOVL R12, R15
1383	ANDL R13, R15
1384	ORL  R8, R15
1385
1386	// MIX
1387	ROLL $+30, R12
1388	ADDL R15, R10
1389	MOVL R11, R8
1390	ROLL $+5, R8
1391	LEAL 2400959708(R10)(R9*1), R10
1392	ADDL R8, R10
1393
1394	// Load m1
1395	MOVQ m1_base+32(FP), R8
1396	MOVL 4(SP), R9
1397	MOVL R9, 196(R8)
1398
1399	// ROUND3(50)
1400	// SHUFFLE
1401	MOVL 8(SP), R9
1402	XORL 60(SP), R9
1403	XORL 40(SP), R9
1404	XORL 16(SP), R9
1405	ROLL $+1, R9
1406	MOVL R9, 8(SP)
1407
1408	// FUNC3
1409	MOVL R11, R8
1410	ORL  R12, R8
1411	ANDL R13, R8
1412	MOVL R11, R15
1413	ANDL R12, R15
1414	ORL  R8, R15
1415
1416	// MIX
1417	ROLL $+30, R11
1418	ADDL R15, R14
1419	MOVL R10, R8
1420	ROLL $+5, R8
1421	LEAL 2400959708(R14)(R9*1), R14
1422	ADDL R8, R14
1423
1424	// Load m1
1425	MOVQ m1_base+32(FP), R8
1426	MOVL 8(SP), R9
1427	MOVL R9, 200(R8)
1428
1429	// ROUND3(51)
1430	// SHUFFLE
1431	MOVL 12(SP), R9
1432	XORL (SP), R9
1433	XORL 44(SP), R9
1434	XORL 20(SP), R9
1435	ROLL $+1, R9
1436	MOVL R9, 12(SP)
1437
1438	// FUNC3
1439	MOVL R10, R8
1440	ORL  R11, R8
1441	ANDL R12, R8
1442	MOVL R10, R15
1443	ANDL R11, R15
1444	ORL  R8, R15
1445
1446	// MIX
1447	ROLL $+30, R10
1448	ADDL R15, R13
1449	MOVL R14, R8
1450	ROLL $+5, R8
1451	LEAL 2400959708(R13)(R9*1), R13
1452	ADDL R8, R13
1453
1454	// Load m1
1455	MOVQ m1_base+32(FP), R8
1456	MOVL 12(SP), R9
1457	MOVL R9, 204(R8)
1458
1459	// ROUND3(52)
1460	// SHUFFLE
1461	MOVL 16(SP), R9
1462	XORL 4(SP), R9
1463	XORL 48(SP), R9
1464	XORL 24(SP), R9
1465	ROLL $+1, R9
1466	MOVL R9, 16(SP)
1467
1468	// FUNC3
1469	MOVL R14, R8
1470	ORL  R10, R8
1471	ANDL R11, R8
1472	MOVL R14, R15
1473	ANDL R10, R15
1474	ORL  R8, R15
1475
1476	// MIX
1477	ROLL $+30, R14
1478	ADDL R15, R12
1479	MOVL R13, R8
1480	ROLL $+5, R8
1481	LEAL 2400959708(R12)(R9*1), R12
1482	ADDL R8, R12
1483
1484	// Load m1
1485	MOVQ m1_base+32(FP), R8
1486	MOVL 16(SP), R9
1487	MOVL R9, 208(R8)
1488
1489	// ROUND3(53)
1490	// SHUFFLE
1491	MOVL 20(SP), R9
1492	XORL 8(SP), R9
1493	XORL 52(SP), R9
1494	XORL 28(SP), R9
1495	ROLL $+1, R9
1496	MOVL R9, 20(SP)
1497
1498	// FUNC3
1499	MOVL R13, R8
1500	ORL  R14, R8
1501	ANDL R10, R8
1502	MOVL R13, R15
1503	ANDL R14, R15
1504	ORL  R8, R15
1505
1506	// MIX
1507	ROLL $+30, R13
1508	ADDL R15, R11
1509	MOVL R12, R8
1510	ROLL $+5, R8
1511	LEAL 2400959708(R11)(R9*1), R11
1512	ADDL R8, R11
1513
1514	// Load m1
1515	MOVQ m1_base+32(FP), R8
1516	MOVL 20(SP), R9
1517	MOVL R9, 212(R8)
1518
1519	// ROUND3(54)
1520	// SHUFFLE
1521	MOVL 24(SP), R9
1522	XORL 12(SP), R9
1523	XORL 56(SP), R9
1524	XORL 32(SP), R9
1525	ROLL $+1, R9
1526	MOVL R9, 24(SP)
1527
1528	// FUNC3
1529	MOVL R12, R8
1530	ORL  R13, R8
1531	ANDL R14, R8
1532	MOVL R12, R15
1533	ANDL R13, R15
1534	ORL  R8, R15
1535
1536	// MIX
1537	ROLL $+30, R12
1538	ADDL R15, R10
1539	MOVL R11, R8
1540	ROLL $+5, R8
1541	LEAL 2400959708(R10)(R9*1), R10
1542	ADDL R8, R10
1543
1544	// Load m1
1545	MOVQ m1_base+32(FP), R8
1546	MOVL 24(SP), R9
1547	MOVL R9, 216(R8)
1548
1549	// ROUND3(55)
1550	// SHUFFLE
1551	MOVL 28(SP), R9
1552	XORL 16(SP), R9
1553	XORL 60(SP), R9
1554	XORL 36(SP), R9
1555	ROLL $+1, R9
1556	MOVL R9, 28(SP)
1557
1558	// FUNC3
1559	MOVL R11, R8
1560	ORL  R12, R8
1561	ANDL R13, R8
1562	MOVL R11, R15
1563	ANDL R12, R15
1564	ORL  R8, R15
1565
1566	// MIX
1567	ROLL $+30, R11
1568	ADDL R15, R14
1569	MOVL R10, R8
1570	ROLL $+5, R8
1571	LEAL 2400959708(R14)(R9*1), R14
1572	ADDL R8, R14
1573
1574	// Load m1
1575	MOVQ m1_base+32(FP), R8
1576	MOVL 28(SP), R9
1577	MOVL R9, 220(R8)
1578
1579	// ROUND3(56)
1580	// SHUFFLE
1581	MOVL 32(SP), R9
1582	XORL 20(SP), R9
1583	XORL (SP), R9
1584	XORL 40(SP), R9
1585	ROLL $+1, R9
1586	MOVL R9, 32(SP)
1587
1588	// FUNC3
1589	MOVL R10, R8
1590	ORL  R11, R8
1591	ANDL R12, R8
1592	MOVL R10, R15
1593	ANDL R11, R15
1594	ORL  R8, R15
1595
1596	// MIX
1597	ROLL $+30, R10
1598	ADDL R15, R13
1599	MOVL R14, R8
1600	ROLL $+5, R8
1601	LEAL 2400959708(R13)(R9*1), R13
1602	ADDL R8, R13
1603
1604	// Load m1
1605	MOVQ m1_base+32(FP), R8
1606	MOVL 32(SP), R9
1607	MOVL R9, 224(R8)
1608
1609	// ROUND3(57)
1610	// SHUFFLE
1611	MOVL 36(SP), R9
1612	XORL 24(SP), R9
1613	XORL 4(SP), R9
1614	XORL 44(SP), R9
1615	ROLL $+1, R9
1616	MOVL R9, 36(SP)
1617
1618	// FUNC3
1619	MOVL R14, R8
1620	ORL  R10, R8
1621	ANDL R11, R8
1622	MOVL R14, R15
1623	ANDL R10, R15
1624	ORL  R8, R15
1625
1626	// MIX
1627	ROLL $+30, R14
1628	ADDL R15, R12
1629	MOVL R13, R8
1630	ROLL $+5, R8
1631	LEAL 2400959708(R12)(R9*1), R12
1632	ADDL R8, R12
1633
1634	// Load m1
1635	MOVQ m1_base+32(FP), R8
1636	MOVL 36(SP), R9
1637	MOVL R9, 228(R8)
1638
1639	// Load cs
1640	MOVQ cs_base+56(FP), R8
1641	MOVL R12, 20(R8)
1642	MOVL R13, 24(R8)
1643	MOVL R14, 28(R8)
1644	MOVL R10, 32(R8)
1645	MOVL R11, 36(R8)
1646
1647	// ROUND3(58)
1648	// SHUFFLE
1649	MOVL 40(SP), R9
1650	XORL 28(SP), R9
1651	XORL 8(SP), R9
1652	XORL 48(SP), R9
1653	ROLL $+1, R9
1654	MOVL R9, 40(SP)
1655
1656	// FUNC3
1657	MOVL R13, R8
1658	ORL  R14, R8
1659	ANDL R10, R8
1660	MOVL R13, R15
1661	ANDL R14, R15
1662	ORL  R8, R15
1663
1664	// MIX
1665	ROLL $+30, R13
1666	ADDL R15, R11
1667	MOVL R12, R8
1668	ROLL $+5, R8
1669	LEAL 2400959708(R11)(R9*1), R11
1670	ADDL R8, R11
1671
1672	// Load m1
1673	MOVQ m1_base+32(FP), R8
1674	MOVL 40(SP), R9
1675	MOVL R9, 232(R8)
1676
1677	// ROUND3(59)
1678	// SHUFFLE
1679	MOVL 44(SP), R9
1680	XORL 32(SP), R9
1681	XORL 12(SP), R9
1682	XORL 52(SP), R9
1683	ROLL $+1, R9
1684	MOVL R9, 44(SP)
1685
1686	// FUNC3
1687	MOVL R12, R8
1688	ORL  R13, R8
1689	ANDL R14, R8
1690	MOVL R12, R15
1691	ANDL R13, R15
1692	ORL  R8, R15
1693
1694	// MIX
1695	ROLL $+30, R12
1696	ADDL R15, R10
1697	MOVL R11, R8
1698	ROLL $+5, R8
1699	LEAL 2400959708(R10)(R9*1), R10
1700	ADDL R8, R10
1701
1702	// Load m1
1703	MOVQ m1_base+32(FP), R8
1704	MOVL 44(SP), R9
1705	MOVL R9, 236(R8)
1706
1707	// ROUND4 (steps 60-79)
1708	// ROUND4(60)
1709	// SHUFFLE
1710	MOVL 48(SP), R9
1711	XORL 36(SP), R9
1712	XORL 16(SP), R9
1713	XORL 56(SP), R9
1714	ROLL $+1, R9
1715	MOVL R9, 48(SP)
1716
1717	// FUNC2
1718	MOVL R11, R15
1719	XORL R12, R15
1720	XORL R13, R15
1721
1722	// MIX
1723	ROLL $+30, R11
1724	ADDL R15, R14
1725	MOVL R10, R8
1726	ROLL $+5, R8
1727	LEAL 3395469782(R14)(R9*1), R14
1728	ADDL R8, R14
1729
1730	// Load m1
1731	MOVQ m1_base+32(FP), R8
1732	MOVL 48(SP), R9
1733	MOVL R9, 240(R8)
1734
1735	// ROUND4(61)
1736	// SHUFFLE
1737	MOVL 52(SP), R9
1738	XORL 40(SP), R9
1739	XORL 20(SP), R9
1740	XORL 60(SP), R9
1741	ROLL $+1, R9
1742	MOVL R9, 52(SP)
1743
1744	// FUNC2
1745	MOVL R10, R15
1746	XORL R11, R15
1747	XORL R12, R15
1748
1749	// MIX
1750	ROLL $+30, R10
1751	ADDL R15, R13
1752	MOVL R14, R8
1753	ROLL $+5, R8
1754	LEAL 3395469782(R13)(R9*1), R13
1755	ADDL R8, R13
1756
1757	// Load m1
1758	MOVQ m1_base+32(FP), R8
1759	MOVL 52(SP), R9
1760	MOVL R9, 244(R8)
1761
1762	// ROUND4(62)
1763	// SHUFFLE
1764	MOVL 56(SP), R9
1765	XORL 44(SP), R9
1766	XORL 24(SP), R9
1767	XORL (SP), R9
1768	ROLL $+1, R9
1769	MOVL R9, 56(SP)
1770
1771	// FUNC2
1772	MOVL R14, R15
1773	XORL R10, R15
1774	XORL R11, R15
1775
1776	// MIX
1777	ROLL $+30, R14
1778	ADDL R15, R12
1779	MOVL R13, R8
1780	ROLL $+5, R8
1781	LEAL 3395469782(R12)(R9*1), R12
1782	ADDL R8, R12
1783
1784	// Load m1
1785	MOVQ m1_base+32(FP), R8
1786	MOVL 56(SP), R9
1787	MOVL R9, 248(R8)
1788
1789	// ROUND4(63)
1790	// SHUFFLE
1791	MOVL 60(SP), R9
1792	XORL 48(SP), R9
1793	XORL 28(SP), R9
1794	XORL 4(SP), R9
1795	ROLL $+1, R9
1796	MOVL R9, 60(SP)
1797
1798	// FUNC2
1799	MOVL R13, R15
1800	XORL R14, R15
1801	XORL R10, R15
1802
1803	// MIX
1804	ROLL $+30, R13
1805	ADDL R15, R11
1806	MOVL R12, R8
1807	ROLL $+5, R8
1808	LEAL 3395469782(R11)(R9*1), R11
1809	ADDL R8, R11
1810
1811	// Load m1
1812	MOVQ m1_base+32(FP), R8
1813	MOVL 60(SP), R9
1814	MOVL R9, 252(R8)
1815
1816	// ROUND4(64)
1817	// SHUFFLE
1818	MOVL (SP), R9
1819	XORL 52(SP), R9
1820	XORL 32(SP), R9
1821	XORL 8(SP), R9
1822	ROLL $+1, R9
1823	MOVL R9, (SP)
1824
1825	// FUNC2
1826	MOVL R12, R15
1827	XORL R13, R15
1828	XORL R14, R15
1829
1830	// MIX
1831	ROLL $+30, R12
1832	ADDL R15, R10
1833	MOVL R11, R8
1834	ROLL $+5, R8
1835	LEAL 3395469782(R10)(R9*1), R10
1836	ADDL R8, R10
1837
1838	// Load m1
1839	MOVQ m1_base+32(FP), R8
1840	MOVL (SP), R9
1841	MOVL R9, 256(R8)
1842
1843	// Load cs
1844	MOVQ cs_base+56(FP), R8
1845	MOVL R10, 40(R8)
1846	MOVL R11, 44(R8)
1847	MOVL R12, 48(R8)
1848	MOVL R13, 52(R8)
1849	MOVL R14, 56(R8)
1850
1851	// ROUND4(65)
1852	// SHUFFLE
1853	MOVL 4(SP), R9
1854	XORL 56(SP), R9
1855	XORL 36(SP), R9
1856	XORL 12(SP), R9
1857	ROLL $+1, R9
1858	MOVL R9, 4(SP)
1859
1860	// FUNC2
1861	MOVL R11, R15
1862	XORL R12, R15
1863	XORL R13, R15
1864
1865	// MIX
1866	ROLL $+30, R11
1867	ADDL R15, R14
1868	MOVL R10, R8
1869	ROLL $+5, R8
1870	LEAL 3395469782(R14)(R9*1), R14
1871	ADDL R8, R14
1872
1873	// Load m1
1874	MOVQ m1_base+32(FP), R8
1875	MOVL 4(SP), R9
1876	MOVL R9, 260(R8)
1877
1878	// ROUND4(66)
1879	// SHUFFLE
1880	MOVL 8(SP), R9
1881	XORL 60(SP), R9
1882	XORL 40(SP), R9
1883	XORL 16(SP), R9
1884	ROLL $+1, R9
1885	MOVL R9, 8(SP)
1886
1887	// FUNC2
1888	MOVL R10, R15
1889	XORL R11, R15
1890	XORL R12, R15
1891
1892	// MIX
1893	ROLL $+30, R10
1894	ADDL R15, R13
1895	MOVL R14, R8
1896	ROLL $+5, R8
1897	LEAL 3395469782(R13)(R9*1), R13
1898	ADDL R8, R13
1899
1900	// Load m1
1901	MOVQ m1_base+32(FP), R8
1902	MOVL 8(SP), R9
1903	MOVL R9, 264(R8)
1904
1905	// ROUND4(67)
1906	// SHUFFLE
1907	MOVL 12(SP), R9
1908	XORL (SP), R9
1909	XORL 44(SP), R9
1910	XORL 20(SP), R9
1911	ROLL $+1, R9
1912	MOVL R9, 12(SP)
1913
1914	// FUNC2
1915	MOVL R14, R15
1916	XORL R10, R15
1917	XORL R11, R15
1918
1919	// MIX
1920	ROLL $+30, R14
1921	ADDL R15, R12
1922	MOVL R13, R8
1923	ROLL $+5, R8
1924	LEAL 3395469782(R12)(R9*1), R12
1925	ADDL R8, R12
1926
1927	// Load m1
1928	MOVQ m1_base+32(FP), R8
1929	MOVL 12(SP), R9
1930	MOVL R9, 268(R8)
1931
1932	// ROUND4(68)
1933	// SHUFFLE
1934	MOVL 16(SP), R9
1935	XORL 4(SP), R9
1936	XORL 48(SP), R9
1937	XORL 24(SP), R9
1938	ROLL $+1, R9
1939	MOVL R9, 16(SP)
1940
1941	// FUNC2
1942	MOVL R13, R15
1943	XORL R14, R15
1944	XORL R10, R15
1945
1946	// MIX
1947	ROLL $+30, R13
1948	ADDL R15, R11
1949	MOVL R12, R8
1950	ROLL $+5, R8
1951	LEAL 3395469782(R11)(R9*1), R11
1952	ADDL R8, R11
1953
1954	// Load m1
1955	MOVQ m1_base+32(FP), R8
1956	MOVL 16(SP), R9
1957	MOVL R9, 272(R8)
1958
1959	// ROUND4(69)
1960	// SHUFFLE
1961	MOVL 20(SP), R9
1962	XORL 8(SP), R9
1963	XORL 52(SP), R9
1964	XORL 28(SP), R9
1965	ROLL $+1, R9
1966	MOVL R9, 20(SP)
1967
1968	// FUNC2
1969	MOVL R12, R15
1970	XORL R13, R15
1971	XORL R14, R15
1972
1973	// MIX
1974	ROLL $+30, R12
1975	ADDL R15, R10
1976	MOVL R11, R8
1977	ROLL $+5, R8
1978	LEAL 3395469782(R10)(R9*1), R10
1979	ADDL R8, R10
1980
1981	// Load m1
1982	MOVQ m1_base+32(FP), R8
1983	MOVL 20(SP), R9
1984	MOVL R9, 276(R8)
1985
1986	// ROUND4(70)
1987	// SHUFFLE
1988	MOVL 24(SP), R9
1989	XORL 12(SP), R9
1990	XORL 56(SP), R9
1991	XORL 32(SP), R9
1992	ROLL $+1, R9
1993	MOVL R9, 24(SP)
1994
1995	// FUNC2
1996	MOVL R11, R15
1997	XORL R12, R15
1998	XORL R13, R15
1999
2000	// MIX
2001	ROLL $+30, R11
2002	ADDL R15, R14
2003	MOVL R10, R8
2004	ROLL $+5, R8
2005	LEAL 3395469782(R14)(R9*1), R14
2006	ADDL R8, R14
2007
2008	// Load m1
2009	MOVQ m1_base+32(FP), R8
2010	MOVL 24(SP), R9
2011	MOVL R9, 280(R8)
2012
2013	// ROUND4(71)
2014	// SHUFFLE
2015	MOVL 28(SP), R9
2016	XORL 16(SP), R9
2017	XORL 60(SP), R9
2018	XORL 36(SP), R9
2019	ROLL $+1, R9
2020	MOVL R9, 28(SP)
2021
2022	// FUNC2
2023	MOVL R10, R15
2024	XORL R11, R15
2025	XORL R12, R15
2026
2027	// MIX
2028	ROLL $+30, R10
2029	ADDL R15, R13
2030	MOVL R14, R8
2031	ROLL $+5, R8
2032	LEAL 3395469782(R13)(R9*1), R13
2033	ADDL R8, R13
2034
2035	// Load m1
2036	MOVQ m1_base+32(FP), R8
2037	MOVL 28(SP), R9
2038	MOVL R9, 284(R8)
2039
2040	// ROUND4(72)
2041	// SHUFFLE
2042	MOVL 32(SP), R9
2043	XORL 20(SP), R9
2044	XORL (SP), R9
2045	XORL 40(SP), R9
2046	ROLL $+1, R9
2047	MOVL R9, 32(SP)
2048
2049	// FUNC2
2050	MOVL R14, R15
2051	XORL R10, R15
2052	XORL R11, R15
2053
2054	// MIX
2055	ROLL $+30, R14
2056	ADDL R15, R12
2057	MOVL R13, R8
2058	ROLL $+5, R8
2059	LEAL 3395469782(R12)(R9*1), R12
2060	ADDL R8, R12
2061
2062	// Load m1
2063	MOVQ m1_base+32(FP), R8
2064	MOVL 32(SP), R9
2065	MOVL R9, 288(R8)
2066
2067	// ROUND4(73)
2068	// SHUFFLE
2069	MOVL 36(SP), R9
2070	XORL 24(SP), R9
2071	XORL 4(SP), R9
2072	XORL 44(SP), R9
2073	ROLL $+1, R9
2074	MOVL R9, 36(SP)
2075
2076	// FUNC2
2077	MOVL R13, R15
2078	XORL R14, R15
2079	XORL R10, R15
2080
2081	// MIX
2082	ROLL $+30, R13
2083	ADDL R15, R11
2084	MOVL R12, R8
2085	ROLL $+5, R8
2086	LEAL 3395469782(R11)(R9*1), R11
2087	ADDL R8, R11
2088
2089	// Load m1
2090	MOVQ m1_base+32(FP), R8
2091	MOVL 36(SP), R9
2092	MOVL R9, 292(R8)
2093
2094	// ROUND4(74)
2095	// SHUFFLE
2096	MOVL 40(SP), R9
2097	XORL 28(SP), R9
2098	XORL 8(SP), R9
2099	XORL 48(SP), R9
2100	ROLL $+1, R9
2101	MOVL R9, 40(SP)
2102
2103	// FUNC2
2104	MOVL R12, R15
2105	XORL R13, R15
2106	XORL R14, R15
2107
2108	// MIX
2109	ROLL $+30, R12
2110	ADDL R15, R10
2111	MOVL R11, R8
2112	ROLL $+5, R8
2113	LEAL 3395469782(R10)(R9*1), R10
2114	ADDL R8, R10
2115
2116	// Load m1
2117	MOVQ m1_base+32(FP), R8
2118	MOVL 40(SP), R9
2119	MOVL R9, 296(R8)
2120
2121	// ROUND4(75)
2122	// SHUFFLE
2123	MOVL 44(SP), R9
2124	XORL 32(SP), R9
2125	XORL 12(SP), R9
2126	XORL 52(SP), R9
2127	ROLL $+1, R9
2128	MOVL R9, 44(SP)
2129
2130	// FUNC2
2131	MOVL R11, R15
2132	XORL R12, R15
2133	XORL R13, R15
2134
2135	// MIX
2136	ROLL $+30, R11
2137	ADDL R15, R14
2138	MOVL R10, R8
2139	ROLL $+5, R8
2140	LEAL 3395469782(R14)(R9*1), R14
2141	ADDL R8, R14
2142
2143	// Load m1
2144	MOVQ m1_base+32(FP), R8
2145	MOVL 44(SP), R9
2146	MOVL R9, 300(R8)
2147
2148	// ROUND4(76)
2149	// SHUFFLE
2150	MOVL 48(SP), R9
2151	XORL 36(SP), R9
2152	XORL 16(SP), R9
2153	XORL 56(SP), R9
2154	ROLL $+1, R9
2155	MOVL R9, 48(SP)
2156
2157	// FUNC2
2158	MOVL R10, R15
2159	XORL R11, R15
2160	XORL R12, R15
2161
2162	// MIX
2163	ROLL $+30, R10
2164	ADDL R15, R13
2165	MOVL R14, R8
2166	ROLL $+5, R8
2167	LEAL 3395469782(R13)(R9*1), R13
2168	ADDL R8, R13
2169
2170	// Load m1
2171	MOVQ m1_base+32(FP), R8
2172	MOVL 48(SP), R9
2173	MOVL R9, 304(R8)
2174
2175	// ROUND4(77)
2176	// SHUFFLE
2177	MOVL 52(SP), R9
2178	XORL 40(SP), R9
2179	XORL 20(SP), R9
2180	XORL 60(SP), R9
2181	ROLL $+1, R9
2182	MOVL R9, 52(SP)
2183
2184	// FUNC2
2185	MOVL R14, R15
2186	XORL R10, R15
2187	XORL R11, R15
2188
2189	// MIX
2190	ROLL $+30, R14
2191	ADDL R15, R12
2192	MOVL R13, R8
2193	ROLL $+5, R8
2194	LEAL 3395469782(R12)(R9*1), R12
2195	ADDL R8, R12
2196
2197	// Load m1
2198	MOVQ m1_base+32(FP), R8
2199	MOVL 52(SP), R9
2200	MOVL R9, 308(R8)
2201
2202	// ROUND4(78)
2203	// SHUFFLE
2204	MOVL 56(SP), R9
2205	XORL 44(SP), R9
2206	XORL 24(SP), R9
2207	XORL (SP), R9
2208	ROLL $+1, R9
2209	MOVL R9, 56(SP)
2210
2211	// FUNC2
2212	MOVL R13, R15
2213	XORL R14, R15
2214	XORL R10, R15
2215
2216	// MIX
2217	ROLL $+30, R13
2218	ADDL R15, R11
2219	MOVL R12, R8
2220	ROLL $+5, R8
2221	LEAL 3395469782(R11)(R9*1), R11
2222	ADDL R8, R11
2223
2224	// Load m1
2225	MOVQ m1_base+32(FP), R8
2226	MOVL 56(SP), R9
2227	MOVL R9, 312(R8)
2228
2229	// ROUND4(79)
2230	// SHUFFLE
2231	MOVL 60(SP), R9
2232	XORL 48(SP), R9
2233	XORL 28(SP), R9
2234	XORL 4(SP), R9
2235	ROLL $+1, R9
2236	MOVL R9, 60(SP)
2237
2238	// FUNC2
2239	MOVL R12, R15
2240	XORL R13, R15
2241	XORL R14, R15
2242
2243	// MIX
2244	ROLL $+30, R12
2245	ADDL R15, R10
2246	MOVL R11, R8
2247	ROLL $+5, R8
2248	LEAL 3395469782(R10)(R9*1), R10
2249	ADDL R8, R10
2250
2251	// Load m1
2252	MOVQ m1_base+32(FP), R8
2253	MOVL 60(SP), R9
2254	MOVL R9, 316(R8)
2255
2256	// Add registers to temp hash.
2257	ADDL R10, AX
2258	ADDL R11, BX
2259	ADDL R12, CX
2260	ADDL R13, DX
2261	ADDL R14, BP
2262	ADDQ $+64, DI
2263	CMPQ DI, SI
2264	JB   loop
2265
2266end:
2267	MOVQ dig+0(FP), SI
2268	MOVL AX, (SI)
2269	MOVL BX, 4(SI)
2270	MOVL CX, 8(SI)
2271	MOVL DX, 12(SI)
2272	MOVL BP, 16(SI)
2273	RET