167 | | " .endr /* end loop */\n" |
| 166 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
| 167 | " movq (%%edi), %%mm2; /* 8 Pixels from reference frame to mm2 */\n" |
| 168 | " movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" |
| 169 | " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
| 170 | " psubusb %%mm2 , %%mm1; /* positive differences between mm1 and mm3 */\n" |
| 171 | " paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */\n" |
| 172 | " movq %%mm1 , %%mm2; /* copy mm1 to mm2 */\n" |
| 173 | " punpcklbw %%mm7 , %%mm1; /* unpack mm1 into mm1 and mm2 */\n" |
| 174 | " punpckhbw %%mm7 , %%mm2; /* */\n" |
| 175 | " paddusw %%mm1 , %%mm0; /* add mm1 (stored in mm1 and mm2...) */\n" |
| 176 | " paddusw %%mm2 , %%mm0; /* to mm0 */\n" |
| 177 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
| 178 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */\n" |
| 179 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
| 180 | " movq (%%edi), %%mm2; /* 8 Pixels from reference frame to mm2 */\n" |
| 181 | " movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" |
| 182 | " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
| 183 | " psubusb %%mm2 , %%mm1; /* positive differences between mm1 and mm3 */\n" |
| 184 | " paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */\n" |
| 185 | " movq %%mm1 , %%mm2; /* copy mm1 to mm2 */\n" |
| 186 | " punpcklbw %%mm7 , %%mm1; /* unpack mm1 into mm1 and mm2 */\n" |
| 187 | " punpckhbw %%mm7 , %%mm2; /* */\n" |
| 188 | " paddusw %%mm1 , %%mm0; /* add mm1 (stored in mm1 and mm2...) */\n" |
| 189 | " paddusw %%mm2 , %%mm0; /* to mm0 */\n" |
| 190 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
| 191 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */\n" |
| 192 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
| 193 | " movq (%%edi), %%mm2; /* 8 Pixels from reference frame to mm2 */\n" |
| 194 | " movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" |
| 195 | " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
| 196 | " psubusb %%mm2 , %%mm1; /* positive differences between mm1 and mm3 */\n" |
| 197 | " paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */\n" |
| 198 | " movq %%mm1 , %%mm2; /* copy mm1 to mm2 */\n" |
| 199 | " punpcklbw %%mm7 , %%mm1; /* unpack mm1 into mm1 and mm2 */\n" |
| 200 | " punpckhbw %%mm7 , %%mm2; /* */\n" |
| 201 | " paddusw %%mm1 , %%mm0; /* add mm1 (stored in mm1 and mm2...) */\n" |
| 202 | " paddusw %%mm2 , %%mm0; /* to mm0 */\n" |
| 203 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
| 204 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */\n" |
| 205 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
| 206 | " movq (%%edi), %%mm2; /* 8 Pixels from reference frame to mm2 */\n" |
| 207 | " movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" |
| 208 | " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
| 209 | " psubusb %%mm2 , %%mm1; /* positive differences between mm1 and mm3 */\n" |
| 210 | " paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */\n" |
| 211 | " movq %%mm1 , %%mm2; /* copy mm1 to mm2 */\n" |
| 212 | " punpcklbw %%mm7 , %%mm1; /* unpack mm1 into mm1 and mm2 */\n" |
| 213 | " punpckhbw %%mm7 , %%mm2; /* */\n" |
| 214 | " paddusw %%mm1 , %%mm0; /* add mm1 (stored in mm1 and mm2...) */\n" |
| 215 | " paddusw %%mm2 , %%mm0; /* to mm0 */\n" |
| 216 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
| 217 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */\n" |
| 218 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
| 219 | " movq (%%edi), %%mm2; /* 8 Pixels from reference frame to mm2 */\n" |
| 220 | " movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" |
| 221 | " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
| 222 | " psubusb %%mm2 , %%mm1; /* positive differences between mm1 and mm3 */\n" |
| 223 | " paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */\n" |
| 224 | " movq %%mm1 , %%mm2; /* copy mm1 to mm2 */\n" |
| 225 | " punpcklbw %%mm7 , %%mm1; /* unpack mm1 into mm1 and mm2 */\n" |
| 226 | " punpckhbw %%mm7 , %%mm2; /* */\n" |
| 227 | " paddusw %%mm1 , %%mm0; /* add mm1 (stored in mm1 and mm2...) */\n" |
| 228 | " paddusw %%mm2 , %%mm0; /* to mm0 */\n" |
| 229 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
| 230 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */\n" |
| 231 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
| 232 | " movq (%%edi), %%mm2; /* 8 Pixels from reference frame to mm2 */\n" |
| 233 | " movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" |
| 234 | " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
| 235 | " psubusb %%mm2 , %%mm1; /* positive differences between mm1 and mm3 */\n" |
| 236 | " paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */\n" |
| 237 | " movq %%mm1 , %%mm2; /* copy mm1 to mm2 */\n" |
| 238 | " punpcklbw %%mm7 , %%mm1; /* unpack mm1 into mm1 and mm2 */\n" |
| 239 | " punpckhbw %%mm7 , %%mm2; /* */\n" |
| 240 | " paddusw %%mm1 , %%mm0; /* add mm1 (stored in mm1 and mm2...) */\n" |
| 241 | " paddusw %%mm2 , %%mm0; /* to mm0 */\n" |
| 242 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
| 243 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */\n" |
| 244 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
| 245 | " movq (%%edi), %%mm2; /* 8 Pixels from reference frame to mm2 */\n" |
| 246 | " movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" |
| 247 | " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
| 248 | " psubusb %%mm2 , %%mm1; /* positive differences between mm1 and mm3 */\n" |
| 249 | " paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */\n" |
| 250 | " movq %%mm1 , %%mm2; /* copy mm1 to mm2 */\n" |
| 251 | " punpcklbw %%mm7 , %%mm1; /* unpack mm1 into mm1 and mm2 */\n" |
| 252 | " punpckhbw %%mm7 , %%mm2; /* */\n" |
| 253 | " paddusw %%mm1 , %%mm0; /* add mm1 (stored in mm1 and mm2...) */\n" |
| 254 | " paddusw %%mm2 , %%mm0; /* to mm0 */\n" |
| 255 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
| 256 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */\n" |
| 257 | |
202 | | " .endr ; /* */\n" |
| 297 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
| 298 | " psadbw (%%edi), %%mm1; /* 8 Pixels difference to mm1 */\n" |
| 299 | " paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
| 300 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
| 301 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */\n" |
| 302 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
| 303 | " psadbw (%%edi), %%mm1; /* 8 Pixels difference to mm1 */\n" |
| 304 | " paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
| 305 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
| 306 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */\n" |
| 307 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
| 308 | " psadbw (%%edi), %%mm1; /* 8 Pixels difference to mm1 */\n" |
| 309 | " paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
| 310 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
| 311 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */\n" |
| 312 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
| 313 | " psadbw (%%edi), %%mm1; /* 8 Pixels difference to mm1 */\n" |
| 314 | " paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
| 315 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
| 316 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */\n" |
| 317 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
| 318 | " psadbw (%%edi), %%mm1; /* 8 Pixels difference to mm1 */\n" |
| 319 | " paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
| 320 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
| 321 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */\n" |
| 322 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
| 323 | " psadbw (%%edi), %%mm1; /* 8 Pixels difference to mm1 */\n" |
| 324 | " paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
| 325 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
| 326 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */\n" |
| 327 | |
269 | | " .endr /* end loop */\n" |
| 407 | " movd (%%esi), %%mm1; /* 4 Pixels from filtered frame to mm1 */\n" |
| 408 | " movd (%%edi), %%mm2; /* 4 Pixels from reference frame to mm2 */\n" |
| 409 | " movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" |
| 410 | " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
| 411 | " psubusb %%mm2 , %%mm1; /* positive differences between mm1 and mm3 */\n" |
| 412 | " paddusb %%mm3 , %%mm1; /* mm2 now contains abs(mm1-mm2) */\n" |
| 413 | " movq %%mm1 , %%mm2; /* copy mm1 to mm2 */\n" |
| 414 | " punpcklbw %%mm7 , %%mm1; /* unpack mm1 into mm1 and mm2 */\n" |
| 415 | " punpckhbw %%mm7 , %%mm2; /* */\n" |
| 416 | " paddusw %%mm1 , %%mm2; /* add mm1 (stored in mm1 and mm2...) */\n" |
| 417 | " paddusw %%mm2 , %%mm0; /* to mm0 */\n" |
| 418 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
| 419 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */\n" |
| 420 | " movd (%%esi), %%mm1; /* 4 Pixels from filtered frame to mm1 */\n" |
| 421 | " movd (%%edi), %%mm2; /* 4 Pixels from reference frame to mm2 */\n" |
| 422 | " movq %%mm2 , %%mm3; /* hold a copy of mm2 in mm3 */\n" |
| 423 | " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n" |
| 424 | " psubusb %%mm2 , %%mm1; /* positive differences between mm1 and mm3 */\n" |
| 425 | " paddusb %%mm3 , %%mm1; /* mm2 now contains abs(mm1-mm2) */\n" |
| 426 | " movq %%mm1 , %%mm2; /* copy mm1 to mm2 */\n" |
| 427 | " punpcklbw %%mm7 , %%mm1; /* unpack mm1 into mm1 and mm2 */\n" |
| 428 | " punpckhbw %%mm7 , %%mm2; /* */\n" |
| 429 | " paddusw %%mm1 , %%mm2; /* add mm1 (stored in mm1 and mm2...) */\n" |
| 430 | " paddusw %%mm2 , %%mm0; /* to mm0 */\n" |
| 431 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
| 432 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */\n" |
| 433 | |
358 | | " .rept 8 ; /* */" |
| 540 | |
| 541 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */" |
| 542 | " movq (%%edi), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */" |
| 543 | " movq (%%eax), %%mm3; /* reference to mm3 */" |
| 544 | " psrlq $1 , %%mm1; /* average source pixels */" |
| 545 | " psrlq $1 , %%mm2; /* shift right by one (divide by two) */" |
| 546 | " pand %%mm6 , %%mm1; /* kill downshifted bits */" |
| 547 | " pand %%mm6 , %%mm2; /* kill downshifted bits */" |
| 548 | " paddusw %%mm2 , %%mm1; /* add up ... */" |
| 549 | |
| 550 | " movq %%mm3 , %%mm4; /* copy reference to mm4 */" |
| 551 | " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */" |
| 552 | " psubusb %%mm4 , %%mm1; /* positive differences between mm1 and mm3 */" |
| 553 | " paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */" |
| 554 | " paddusw %%mm1 , %%mm0; /* add result to mm0 */" |
| 555 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */" |
| 556 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */" |
| 557 | " addl %%ecx , %%ecx; /* add framewidth to frameaddress */" |
| 558 | |
| 559 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */" |
| 560 | " movq (%%edi), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */" |
| 561 | " movq (%%eax), %%mm3; /* reference to mm3 */" |
| 562 | " psrlq $1 , %%mm1; /* average source pixels */" |
| 563 | " psrlq $1 , %%mm2; /* shift right by one (divide by two) */" |
| 564 | " pand %%mm6 , %%mm1; /* kill downshifted bits */" |
| 565 | " pand %%mm6 , %%mm2; /* kill downshifted bits */" |
| 566 | " paddusw %%mm2 , %%mm1; /* add up ... */" |
| 567 | |
| 568 | " movq %%mm3 , %%mm4; /* copy reference to mm4 */" |
| 569 | " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */" |
| 570 | " psubusb %%mm4 , %%mm1; /* positive differences between mm1 and mm3 */" |
| 571 | " paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */" |
| 572 | " paddusw %%mm1 , %%mm0; /* add result to mm0 */" |
| 573 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */" |
| 574 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */" |
| 575 | " addl %%ecx , %%ecx; /* add framewidth to frameaddress */" |
| 576 | |
| 577 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */" |
| 578 | " movq (%%edi), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */" |
| 579 | " movq (%%eax), %%mm3; /* reference to mm3 */" |
| 580 | " psrlq $1 , %%mm1; /* average source pixels */" |
| 581 | " psrlq $1 , %%mm2; /* shift right by one (divide by two) */" |
| 582 | " pand %%mm6 , %%mm1; /* kill downshifted bits */" |
| 583 | " pand %%mm6 , %%mm2; /* kill downshifted bits */" |
| 584 | " paddusw %%mm2 , %%mm1; /* add up ... */" |
| 585 | |
| 586 | " movq %%mm3 , %%mm4; /* copy reference to mm4 */" |
| 587 | " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */" |
| 588 | " psubusb %%mm4 , %%mm1; /* positive differences between mm1 and mm3 */" |
| 589 | " paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */" |
| 590 | " paddusw %%mm1 , %%mm0; /* add result to mm0 */" |
| 591 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */" |
| 592 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */" |
| 593 | " addl %%ecx , %%ecx; /* add framewidth to frameaddress */" |
| 594 | |
376 | | " .endr ; /* */" |
| 612 | |
| 613 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */" |
| 614 | " movq (%%edi), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */" |
| 615 | " movq (%%eax), %%mm3; /* reference to mm3 */" |
| 616 | " psrlq $1 , %%mm1; /* average source pixels */" |
| 617 | " psrlq $1 , %%mm2; /* shift right by one (divide by two) */" |
| 618 | " pand %%mm6 , %%mm1; /* kill downshifted bits */" |
| 619 | " pand %%mm6 , %%mm2; /* kill downshifted bits */" |
| 620 | " paddusw %%mm2 , %%mm1; /* add up ... */" |
| 621 | |
| 622 | " movq %%mm3 , %%mm4; /* copy reference to mm4 */" |
| 623 | " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */" |
| 624 | " psubusb %%mm4 , %%mm1; /* positive differences between mm1 and mm3 */" |
| 625 | " paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */" |
| 626 | " paddusw %%mm1 , %%mm0; /* add result to mm0 */" |
| 627 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */" |
| 628 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */" |
| 629 | " addl %%ecx , %%ecx; /* add framewidth to frameaddress */" |
| 630 | |
| 631 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */" |
| 632 | " movq (%%edi), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */" |
| 633 | " movq (%%eax), %%mm3; /* reference to mm3 */" |
| 634 | " psrlq $1 , %%mm1; /* average source pixels */" |
| 635 | " psrlq $1 , %%mm2; /* shift right by one (divide by two) */" |
| 636 | " pand %%mm6 , %%mm1; /* kill downshifted bits */" |
| 637 | " pand %%mm6 , %%mm2; /* kill downshifted bits */" |
| 638 | " paddusw %%mm2 , %%mm1; /* add up ... */" |
| 639 | |
| 640 | " movq %%mm3 , %%mm4; /* copy reference to mm4 */" |
| 641 | " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */" |
| 642 | " psubusb %%mm4 , %%mm1; /* positive differences between mm1 and mm3 */" |
| 643 | " paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */" |
| 644 | " paddusw %%mm1 , %%mm0; /* add result to mm0 */" |
| 645 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */" |
| 646 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */" |
| 647 | " addl %%ecx , %%ecx; /* add framewidth to frameaddress */" |
| 648 | |
| 649 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */" |
| 650 | " movq (%%edi), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */" |
| 651 | " movq (%%eax), %%mm3; /* reference to mm3 */" |
| 652 | " psrlq $1 , %%mm1; /* average source pixels */" |
| 653 | " psrlq $1 , %%mm2; /* shift right by one (divide by two) */" |
| 654 | " pand %%mm6 , %%mm1; /* kill downshifted bits */" |
| 655 | " pand %%mm6 , %%mm2; /* kill downshifted bits */" |
| 656 | " paddusw %%mm2 , %%mm1; /* add up ... */" |
| 657 | |
| 658 | " movq %%mm3 , %%mm4; /* copy reference to mm4 */" |
| 659 | " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */" |
| 660 | " psubusb %%mm4 , %%mm1; /* positive differences between mm1 and mm3 */" |
| 661 | " paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */" |
| 662 | " paddusw %%mm1 , %%mm0; /* add result to mm0 */" |
| 663 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */" |
| 664 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */" |
| 665 | " addl %%ecx , %%ecx; /* add framewidth to frameaddress */" |
| 666 | |
| 667 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */" |
| 668 | " movq (%%edi), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */" |
| 669 | " movq (%%eax), %%mm3; /* reference to mm3 */" |
| 670 | " psrlq $1 , %%mm1; /* average source pixels */" |
| 671 | " psrlq $1 , %%mm2; /* shift right by one (divide by two) */" |
| 672 | " pand %%mm6 , %%mm1; /* kill downshifted bits */" |
| 673 | " pand %%mm6 , %%mm2; /* kill downshifted bits */" |
| 674 | " paddusw %%mm2 , %%mm1; /* add up ... */" |
| 675 | |
| 676 | " movq %%mm3 , %%mm4; /* copy reference to mm4 */" |
| 677 | " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */" |
| 678 | " psubusb %%mm4 , %%mm1; /* positive differences between mm1 and mm3 */" |
| 679 | " paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */" |
| 680 | " paddusw %%mm1 , %%mm0; /* add result to mm0 */" |
| 681 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */" |
| 682 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */" |
| 683 | " addl %%ecx , %%ecx; /* add framewidth to frameaddress */" |
| 684 | |
414 | | " .endr ; /* */\n" |
| 742 | |
| 743 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
| 744 | " movq (%%edi), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */\n" |
| 745 | " movq (%%eax), %%mm3; /* 8 Pixels from reference frame to mm3 */\n" |
| 746 | " pavgb %%mm2 , %%mm1; /* average source pixels */\n" |
| 747 | " psadbw %%mm3 , %%mm1; /* 8 Pixels difference to mm1 */\n" |
| 748 | " paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
| 749 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
| 750 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */\n" |
| 751 | " addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
| 752 | |
| 753 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
| 754 | " movq (%%edi), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */\n" |
| 755 | " movq (%%eax), %%mm3; /* 8 Pixels from reference frame to mm3 */\n" |
| 756 | " pavgb %%mm2 , %%mm1; /* average source pixels */\n" |
| 757 | " psadbw %%mm3 , %%mm1; /* 8 Pixels difference to mm1 */\n" |
| 758 | " paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
| 759 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
| 760 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */\n" |
| 761 | " addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
| 762 | |
| 763 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
| 764 | " movq (%%edi), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */\n" |
| 765 | " movq (%%eax), %%mm3; /* 8 Pixels from reference frame to mm3 */\n" |
| 766 | " pavgb %%mm2 , %%mm1; /* average source pixels */\n" |
| 767 | " psadbw %%mm3 , %%mm1; /* 8 Pixels difference to mm1 */\n" |
| 768 | " paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
| 769 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
| 770 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */\n" |
| 771 | " addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
| 772 | |
| 773 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
| 774 | " movq (%%edi), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */\n" |
| 775 | " movq (%%eax), %%mm3; /* 8 Pixels from reference frame to mm3 */\n" |
| 776 | " pavgb %%mm2 , %%mm1; /* average source pixels */\n" |
| 777 | " psadbw %%mm3 , %%mm1; /* 8 Pixels difference to mm1 */\n" |
| 778 | " paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
| 779 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
| 780 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */\n" |
| 781 | " addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
| 782 | |
| 783 | " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n" |
| 784 | " movq (%%edi), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */\n" |
| 785 | " movq (%%eax), %%mm3; /* 8 Pixels from reference frame to mm3 */\n" |
| 786 | " pavgb %%mm2 , %%mm1; /* average source pixels */\n" |
| 787 | " psadbw %%mm3 , %%mm1; /* 8 Pixels difference to mm1 */\n" |
| 788 | " paddusw %%mm1 , %%mm0; /* add result to mm0 */\n" |
| 789 | " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n" |
| 790 | " addl %%ecx , %%edi; /* add framewidth to frameaddress */\n" |
| 791 | " addl %%ecx , %%eax; /* add framewidth to frameaddress */\n" |
| 792 | |