Who can help optimizate following c code, make it more fast under window Xp 32Bit platform. _capactiy maybe is very big number like 3200000, so how to make the loop more effiecient? Code: unsigned __int64 *p = (unsigned __int64 *)_data; unsigned __int64 *q = (unsigned __int64 *)bitmap._data; for ( int i = _capacity; i > 0 ; i-= 32 ) { *p |= *q; *(p+1) |= *(q+1); *(p+2) |= *(q+2); *(p+3) |= *(q+3); *(p+4) |= *(q+4); *(p+5) |= *(q+5); *(p+6) |= *(q+6); *(p+7) |= *(q+7); *(p+8) |= *(q+8); *(p+9) |= *(q+9); *(p+10) |= *(q+10); *(p+11) |= *(q+11); *(p+12) |= *(q+12); *(p+13) |= *(q+13); *(p+14) |= *(q+14); *(p+15) |= *(q+15); *(p+16) |= *(q+16); *(p+17) |= *(q+17); *(p+18) |= *(q+18); *(p+19) |= *(q+19); *(p+20) |= *(q+20); *(p+21) |= *(q+21); *(p+22) |= *(q+22); *(p+23) |= *(q+23); *(p+24) |= *(q+24); *(p+25) |= *(q+25); *(p+26) |= *(q+26); *(p+27) |= *(q+27); *(p+28) |= *(q+28); *(p+29) |= *(q+29); *(p+30) |= *(q+30); *(p+31) |= *(q+31); p+=32; q+=32; }