以下为引用: global_fast_memcpy9 %defineparamesp124 %definesrcparam0 %definedstparam4 %definelenparam8 %defineCACHEBLOCK400h _fast_memcpy9: pushesi pushedi pushebx movesi,[src];sourcearray movedi,[dst];destinationarray movecx,[len];numbero
以下为引用:global _fast_memcpy9 %define param esp+12+4 %define src param+0 %define dst param+4 %define len param+8 %define CACHEBLOCK 400h _fast_memcpy9: push esi push edi push ebx mov esi, [src] ; source array mov edi, [dst] ; destination array mov ecx, [len] ; number of QWORDS (8 bytes) assumes len / CACHEBLOCK is an integer shr ecx, 3 lea esi, [esi+ecx*8] ; end of source lea edi, [edi+ecx*8] ; end of destination neg ecx ; use a negative offset as a combo pointer-and-loop-counter .mainloop: mov eax, CACHEBLOCK / 16 ; note: .prefetchloop is unrolled 2X add ecx, CACHEBLOCK ; move up to end of block .prefetchloop: mov ebx, [esi+ecx*8-64] ; read one address in this cache line... mov ebx, [esi+ecx*8-128] ; ... and one in the previous line sub ecx, 16 ; 16 QWORDS = 2 64-byte cache lines dec eax jnz .prefetchloop mov eax, CACHEBLOCK / 8 .writeloop: prefetchnta [esi+ecx*8 + 512] ; fetch ahead by 512 bytes movq mm0, qword [esi+ecx*8] movq mm1, qword [esi+ecx*8+8] movq mm2, qword [esi+ecx*8+16] movq mm3, qword [esi+ecx*8+24] movq mm4, qword [esi+ecx*8+32] movq mm5, qword [esi+ecx*8+40] movq mm6, qword [esi+ecx*8+48] movq mm7, qword [esi+ecx*8+56] movntq qword [edi+ecx*8], mm0 movntq qword [edi+ecx*8+8], mm1 movntq qword [edi+ecx*8+16], mm2 movntq qword [edi+ecx*8+24], mm3 movntq qword [edi+ecx*8+32], mm4 movntq qword [edi+ecx*8+40], mm5 movntq qword [edi+ecx*8+48], mm6 movntq qword [edi+ecx*8+56], mm7 add ecx, 8 dec eax jnz .writeloop or ecx, ecx ; assumes integer number of cacheblocks jnz .mainloop sfence ; flush write buffer emms pop ebx pop edi pop esi ret
每个人都需要一台速度更快、更稳定的 PC。随着时间的推移,垃圾文件、旧注册表数据和不必要的后台进程会占用资源并降低性能。幸运的是,许多工具可以让 Windows 保持平稳运行。
Copyright 2014-2025 https://www.php.cn/ All Rights Reserved | php.cn | 湘ICP备2023035733号