In the attached version (source file and release exe), the code now prints the processor name, together with the Intel microarchitecture, if it's one that I know about (i.e., was listed in the current and 2007 Intel Optimization references). I also found and fixed a bug where the code mistakenly reused the SSE2 bit to indicate the presence of SSE3.
Unfortunately for some of you, my code still has inline assembly.
One thing that I had trouble with was trying to allocate heap memory for a couple of strings. When I allocated them with either malloc() or calloc(), I couldn't free() the pointers without generating a bad pointer exception. Things worked fine if I didn't try to free the pointers, but it's not good practice to allocate heap storage without freeing it when you're done. All this was happening with VS 10. I was able to workaround the problem by using _malloca() and _freea, but I have no idea why the older run-time library functions were acting up.
Below are the functions that contain inline assembly, together with a summary of how much or how little they changed.
cpu_info function - unchanged from before except for description comment.
// Get version and feature information from cpuid, leaf EAX = 1.
void cpu_info(struct CpuData *returnData)
{
__asm{
push ebx
mov eax, 1H
cpuid
mov ebx, returnData
; After the call to cpuid, store EAX, ECX, and EDX in the CpuData struct.
mov (dword ptr [ebx]).eaxReg, eax
mov (dword ptr [ebx]).ecxReg, ecx
mov (dword ptr[ebx]).edxReg, edx
pop ebx
}
}
printCpuidData function - logic changed extensively.
// Now check for support for AVX2.
// Can't check for AVX2 unless the CPU supports AVX.
if (AVX && OSXSAVE && (OSFlags & 0x6))
{
// Call cpuid with EAX = 7 and ECX = 0.
// If bit 5 in the returned EBX is 1, AVX2 is supported.
__asm{
push eax
push ecx
mov eax, 7
xor ecx, ecx
cpuid
mov cacheEBX, ebx
pop ecx
pop eax
}
AVX2 = (cacheEBX & 0x20) >> 5;
printf("CPU and OS %s AVX2.\n", AVX2 ? "support" : "do not support");
}
else printf("CPU and OS do not support AVX2.\n");
AVXSupportInOS function - no change.
int AVXSupportInOS()
{
// Read the contents of the extended control register (XCR) specified
// in the ECX register.
// Currently, only XCR0 is supported, so set ECX to zero before
// executing xgetbv (get value of extended control register.
// If bits 1 and 2 are set, XMM state and YMM state are enabled by the OS.
int OSFlags;
__asm{
xor ecx, ecx
xgetbv
mov OSFlags, eax
}
return OSFlags;
}
cpu_name function - This is new, and is based on the code that TWell and dedndave contributed in this thread.
void cpu_name(char *name)
{
__asm{
push ebx
mov edi, dword ptr name
mov dword ptr [edi], 0
mov eax, 80000000h
cpuid
cmp eax, 80000004h ; If eax < 80000004h, processor name isn't supported.
jna pass
xor esi, esi
loop1:
lea eax, [esi + 80000002h]
cpuid
mov dword ptr [edi], eax
mov dword ptr [edi+4], ebx
mov dword ptr [edi+8], ecx
mov dword ptr [edi+12], edx
add edi, 16
inc esi
cmp esi, 3
jb loop1
pass:
pop ebx
}
}