; void fade_image(uint16_t *dest, uint32_t *src, uint16_t fade) ; - dest points to a 16bit 565 framebuffer ; - src points to a 32bit RGBX image ; - fade is 24.8 fixed point [0, 1] ; watcom register calling convention arguments: eax, edx, ebx global fade_image_ fade_image_: push ecx push edi mov ecx, 640 * 480 mov edi, eax ; take fade and duplicate it across all words of mm1 movd mm1, ebx ; mm1 [00|00|00|VV] punpckldq mm1, mm1 ; mm1 [00|VV|00|VV] packssdw mm1, mm1 ; mm1 [VV|VV|VV|VV] .loop: ; grab RGB32 pixel and unpack it to zero-extended words in mm0 movd mm0, [edx] ; mm0 [??|??|?R|GB] add edx, 4 pxor mm7, mm7 punpcklbw mm0, mm7 ; mm0 [0?|0R|0G|0B] ; multiply by fade and divide by 256 to drop the decimal part pmullw mm0, mm1 psrlw mm0, 10 ; 8 for the div + 2 to make them 666, easier 565 packing ; pack result into 565 in ax packuswb mm0, mm0 movd eax, mm0 mov ebx, eax shr al, 1 ; blue in position [........|00RRRRRR|00GGGGGG|000BBBBB] xor bl, bl shr bx, 3 xor ah, ah or ax, bx ; green in position ...|00RRRRRR|00000GGG|GGGBBBBB] shr ebx, 6 and ebx, 0f800h or eax, ebx ; done [RRRRRGGG|GGGBBBBB] mov [edi], ax add edi, 2 dec ecx jnz .loop emms ; clear fpu state pop edi pop ecx ret