assembly 使用CPUID获取Ryzen CPU中的缓存大小

z9zf31ra  于 2022-11-13  发布在  其他
关注(0)|答案(1)|浏览(149)

我想使用CPUID指令来获取每个缓存级别(L1、L2、L3)的大小。
我一直在阅读"AMD64 Architecture Programmer’s Manual Volume 3: General-Purpose and System Instructions",在613页有L1的相关信息:

在第616页中,存在与L2和L3相关的信息。

我已经做了一个简单的C+汇编程序来打印这些值。

const char* amd_L2_L3_associotivity_str(uint32_t code)
{
    switch(code) {
        case 0: return "Disabled";
        case 1: return "1 way (direct mapped)";
        case 2: return "2 way";
        case 4: return "4 way";
        case 6: return "8 way";
        case 8: return "16 way";
        case 10: return "32 way";
        case 11: return "48 way";
        case 12: return "64 way";
        case 13: return "96 way";
        case 14: return "128 way";
        case 15: return "Fully Associative";
        default: assert(0);
    }
    return 0;
}

void cpuid_caches_amd()
{
    uint32_t eax, ebx, ecx, edx;
    { // L1
        eax = 0x80000005; // the specific code of the cpuid instruction for L1

        __asm__ (
            "cpuid" // cpuid is the name of the instruction that queries the info we want
            : "+a" (eax)
            , "=b" (ebx)
            , "=c" (ecx)
            , "=d" (edx)
        );

        uint32_t
            dataCache_size = ecx & 0xFF,
            dataCache_associativity = (ecx >> 8) & 0xFF,
            dataCache_linesPerTag = (ecx >> 16) & 0xFF,
            dataCache_lineSize = (ecx >> 24) & 0xFF;

        uint32_t
            instrCache_size = edx & 0xFF,
            instrCache_associativity = (edx >> 8) & 0xFF,
            instrCache_linesPerTag = (edx >> 16) & 0xFF,
            instrCache_lineSize = (edx >> 24) & 0xFF;

        printf(
            "L1 Data Cache:\n"
            "\tSize: %d KB\n"
            "\tAssociativity: %d\n"
            "\tLines per Tag: %d\n"
            "\tLine Size: %d B\n"
            "\n"
            "L1 Instruction Cache:\n"
            "\tSize: %d KB\n"
            "\tAssociativity: %d\n"
            "\tLines per Tag: %d\n"
            "\tLine Size: %d B\n"
            ,
            dataCache_size,
            dataCache_associativity,
            dataCache_linesPerTag,
            dataCache_lineSize,
            instrCache_size,
            instrCache_associativity,
            instrCache_linesPerTag,
            instrCache_lineSize
        );
    }

    { // L2, L3
        eax = 0x80000006; // the specific code of the cpuid instruction for L1

        __asm__ (
            "cpuid" // cpuid is the name of the instruction that queries the info we want
            : "+a" (eax)
            , "=b" (ebx)
            , "=c" (ecx)
            , "=d" (edx)
        );

        uint32_t
            L2_size = ecx & 0xFFFF,
            L2_associativity = (ecx >> 16) & 0xF,
            L2_linesPerTag = (ecx >> 20) & 0xF,
            L2_lineSize = (ecx >> 24) & 0xFF;

        uint32_t
            L3_size = edx & 0x3FFF,
            L3_associativity = (edx >> 16) & 0xF,
            L3_linesPerTag = (edx >> 20) & 0xF,
            L3_lineSize = (edx >> 24) & 0xFF;

        printf(
            "L2 Cache:\n"
            "\tSize: %d KB\n"
            "\tAssociativity: %s\n"
            "\tLines per Tag: %d\n"
            "\tLine Size: %d B\n"
            "\n"
            "L3 Cache:\n"
            "\tSize: %d KB\n"
            "\tAssociativity: %s\n"
            "\tLines per Tag: %d\n"
            "\tLine Size: %d B\n"
            ,
            L2_size,
            amd_L2_L3_associotivity_str(L2_associativity),
            L2_linesPerTag,
            L2_lineSize,
            L3_size * 512,
            amd_L2_L3_associotivity_str(L3_associativity),
            L3_linesPerTag,
            L3_lineSize
        );
    }
}

int main()
{
    cpuid_caches_amd();
}

下面是我的Ryzen 3700 X的程序输出:

L1 Data Cache:
        Size: 64 KB
        Associativity: 1
        Lines per Tag: 8
        Line Size: 32 B

L1 Instruction Cache:
        Size: 64 KB
        Associativity: 1
        Lines per Tag: 8
        Line Size: 32 B
L2 Cache:
        Size: 24896 KB
        Associativity: Disabled
        Lines per Tag: 0
        Line Size: 2 B

L3 Cache:
        Size: 2260992 KB
        Associativity: Disabled
        Lines per Tag: 0
        Line Size: 1 B

根据这个,我有2GB的L3缓存,这是不可能的。根据official specs,它应该有L1:512 KB,二级缓存:4 MB,三级缓存:32 MB的内存。
另一件让我困惑的事情是L1DcSize只有8位宽。这只允许表示最多255 KB的L1大小,尽管我的CPU应该有512 KB的L1!
我的代码有什么问题?我如何才能得到实际的缓存大小?
编辑:
谢谢大家的回复。正如人们指出的,我的位移位被反转了。还有一个关于缓存L1的问题。下面是代码的样子:

void cpuid_caches_amd()
{
    uint32_t eax, ebx, ecx, edx;
    { // L1
        eax = 0x80000005; // the specific code of the cpuid instruction for L1

        __asm__ (
            "cpuid" // cpuid is the name of the instruction that queries the info we want
            : "+a" (eax)
            , "=b" (ebx)
            , "=c" (ecx)
            , "=d" (edx)
        );

        uint32_t
            dataCache_size = (ecx >> 24) & 0xFF,
            dataCache_associativity = (ecx >> 16) & 0xFF,
            dataCache_linesPerTag = (ecx >> 8) & 0xFF,
            dataCache_lineSize = ecx & 0xFF;

        uint32_t
            instrCache_size = (edx >> 24) & 0xFF,
            instrCache_associativity = (edx >> 16) & 0xFF,
            instrCache_linesPerTag = (edx >> 8) & 0xFF,
            instrCache_lineSize = edx & 0xFF;

        printf(
            "L1 Data Cache:\n"
            "\tSize: %d KB\n"
            "\tAssociativity: %d\n"
            "\tLines per Tag: %d\n"
            "\tLine Size: %d B\n"
            "\n"
            "L1 Instruction Cache:\n"
            "\tSize: %d KB\n"
            "\tAssociativity: %d\n"
            "\tLines per Tag: %d\n"
            "\tLine Size: %d B\n"
            ,
            dataCache_size,
            dataCache_associativity,
            dataCache_linesPerTag,
            dataCache_lineSize,
            instrCache_size,
            instrCache_associativity,
            instrCache_linesPerTag,
            instrCache_lineSize
        );
    }

    { // L2
        eax = 0x80000006; // the specific code of the cpuid instruction for L1

        __asm__ (
            "cpuid" // cpuid is the name of the instruction that queries the info we want
            : "+a" (eax)
            , "=b" (ebx)
            , "=c" (ecx)
            , "=d" (edx)
        );

        uint32_t
            L2_size = (ecx >> 16) & 0xFFFF,
            L2_associativity = (ecx >> 12) & 0xF,
            L2_linesPerTag = (ecx >> 8) & 0xF,
            L2_lineSize = ecx & 0xFF;

        uint32_t
            L3_size = (edx >> 18) & 0x3FFF,
            L3_associativity = (edx >> 12) & 0xF,
            L3_linesPerTag = (edx >> 8) & 0xF,
            L3_lineSize = (edx >> 0) & 0xFF;

        printf(
            "L2 Cache:\n"
            "\tSize: %d KB\n"
            "\tAssociativity: %s\n"
            "\tLines per Tag: %d\n"
            "\tLine Size: %d B\n"
            "\n"
            "L3 Cache:\n"
            "\tSize: %d KB\n"
            "\tAssociativity: %s\n"
            "\tLines per Tag: %d\n"
            "\tLine Size: %d B\n"
            ,
            L2_size,
            amd_L2_L3_associotivity_str(L2_associativity),
            L2_linesPerTag,
            L2_lineSize,
            L3_size * 512,
            amd_L2_L3_associotivity_str(L3_associativity),
            L3_linesPerTag,
            L3_lineSize
        );
    }
}

而新的输出:

L1 Data Cache:
        Size: 32 KB
        Associativity: 8
        Lines per Tag: 1
        Line Size: 64 B

L1 Instruction Cache:
        Size: 32 KB
        Associativity: 8
        Lines per Tag: 1
        Line Size: 64 B
L2 Cache:
        Size: 512 KB
        Associativity: 8 way
        Lines per Tag: 1
        Line Size: 64 B

L3 Cache:
        Size: 32768 KB
        Associativity: Value for all fields should be determined from Fn8000_001D
        Lines per Tag: 1
        Line Size: 64 B
dfuffjeb

dfuffjeb1#

看起来你在结果的位布局上犯了错误。大小结果在高位,但你提取的是低位。其他字段的顺序也是相反的。
例如,L3Size31:18中,但您使用的遮罩为0x3FFF,且没有位移,而使用的是13:0
而不是:

L3_size = edx & 0x3FFF,
L3_associativity = (edx >> 16) & 0xF,
L3_linesPerTag = (edx >> 20) & 0xF,
L3_lineSize = (edx >> 24) & 0xFF;

写上:

L3_size = (edx >> 18) & 0x3FFF,
L3_associativity = (edx >> 12) & 0xF,
L3_linesPerTag = (edx >> 8) & 0xF,
L3_lineSize = (edx >> 0) & 0xFF;

对于其他寄存器也是如此。字段的最低位是你必须移位的量。
至于第二个关于L1大小的问题,规格表中有芯片总数。但是,从CPUID的Angular 来看,重要的是该内核的高速缓存。如果将L1i的32 kB和L1d的32 kB乘以处理器中的8个内核,则得到预期的512 kB。

相关问题