gprofng: 31123 improvements to hardware event implementation

Our hardware counter profiling is based on perf_event_open().
Our HWC tables are absent for new machines.
I have added HWC tables for the following events: PERF_TYPE_HARDWARE,
PERF_TYPE_SOFTWARE, PERF_TYPE_HW_CACHE. Other events require additional fixes.

Did a little cleaning: marked the symbols as static, used Stringbuilder,
created a function to read /proc/cpuinfo.

gprofng/ChangeLog
2024-01-08  Vladimir Mezentsev  <vladimir.mezentsev@oracle.com>

	PR gprofng/31123
	* common/core_pcbe.c: Mark the symbols as static. Add events_generic[].
	* common/hwc_cpus.h: Declare a new function read_cpuinfo.
	* common/hwcdrv.c: Add a new parameter in init_perf_event().
	* common/hwcentry.h: Add use_perf_event_type in Hwcentry.
	* common/hwcfuncs.c (process_data_descriptor): Read use_perf_event_type,
	type, config.
	* common/hwctable.c: Add a new HWC table generic_list[].
	* common/opteron_pcbe.c (opt_pcbe_init): Accept AMD machines.
	* src/collctrl.cc: Use StringBuilder in Coll_Ctrl::build_data_desc().
	Add a new function read_cpuinfo.
This commit is contained in:
Vladimir Mezentsev 2024-01-08 22:00:24 -08:00
parent e1cab50d2d
commit 8fe04eeb2c
8 changed files with 293 additions and 247 deletions

View File

@ -2597,102 +2597,95 @@ struct events_table_t
static const struct events_table_t *events_table = NULL;
const struct events_table_t events_fam6_mod23[] = {
static const struct events_table_t events_fam6_mod23[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD23
NT_END
};
const struct events_table_t events_fam6_mod28[] = {
static const struct events_table_t events_fam6_mod28[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD28
NT_END
};
const struct events_table_t events_fam6_mod26[] = {
static const struct events_table_t events_fam6_mod26[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD26
NT_END
};
const struct events_table_t events_fam6_mod46[] = {
static const struct events_table_t events_fam6_mod46[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD26
EVENTS_FAM6_MOD46_ONLY
NT_END
};
const struct events_table_t events_fam6_mod37[] = {
static const struct events_table_t events_fam6_mod37[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD37
EVENTS_FAM6_MOD37_ALSO
NT_END
};
const struct events_table_t events_fam6_mod47[] = {
static const struct events_table_t events_fam6_mod47[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD37
NT_END
};
const struct events_table_t events_fam6_mod42[] = {
static const struct events_table_t events_fam6_mod42[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD42
EVENTS_FAM6_MOD42_ONLY
NT_END
};
const struct events_table_t events_fam6_mod45[] = {
static const struct events_table_t events_fam6_mod45[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD42
EVENTS_FAM6_MOD45_ONLY
NT_END
};
const struct events_table_t events_fam6_mod58[] = {
static const struct events_table_t events_fam6_mod58[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD58
NT_END
};
const struct events_table_t events_fam6_mod62[] = {
static const struct events_table_t events_fam6_mod62[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD58
EVENTS_FAM6_MOD62_ONLY
NT_END
};
const struct events_table_t events_fam6_mod60[] = {
static const struct events_table_t events_fam6_mod60[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD60
NT_END
};
const struct events_table_t events_fam6_mod61[] = {
static const struct events_table_t events_fam6_mod61[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD61
NT_END
};
const struct events_table_t events_fam6_mod78[] = {
static const struct events_table_t events_fam6_mod78[] = {
ARCH_EVENTS
EVENTS_FAM6_MOD78
NT_END
};
const struct events_table_t events_fam6_unknown[] = {
static const struct events_table_t events_fam6_unknown[] = {
ARCH_EVENTS
NT_END
};
const struct events_table_t events_fam_arm[] = {
// ARCH_EVENTS
// *eventnum = pevent->eventselect;
// *eventnum |= (pevent->unitmask << PERFCTR_UMASK_SHIFT);
// *eventnum |= (pevent->attrs << 16);
// *eventnum |= (pevent->cmask << 24);
// eventselect, unitmask, supported_counters, name, cmask, attrs, msr_offset
const struct events_table_t events_generic[] = {
// Hardware event
#define HWE(nm, id) { id, 0, C_ALL, nm, PERF_TYPE_HARDWARE, 0, 0 },
HWE("branch-instructions", PERF_COUNT_HW_BRANCH_INSTRUCTIONS)
@ -2741,13 +2734,20 @@ core_pcbe_init (void)
{
switch (cpuid_getvendor ())
{
case X86_VENDOR_AMD:
snprintf (core_impl_name, sizeof (core_impl_name), "%s", X86_VENDORSTR_AMD);
events_table = events_generic;
num_gpc = 4;
num_ffc = 0;
total_pmc = num_gpc + num_ffc;
return 0;
case ARM_CPU_IMP_ARM:
case ARM_CPU_IMP_BRCM:
case ARM_CPU_IMP_CAVIUM:
case ARM_CPU_IMP_APM:
case ARM_CPU_IMP_QCOM:
snprintf (core_impl_name, sizeof (core_impl_name), "%s", AARCH64_VENDORSTR_ARM);
events_table = events_fam_arm;
events_table = events_generic;
num_gpc = 4; // MEZ: a real implementation is needed
num_ffc = 0;
total_pmc = num_gpc + num_ffc;

View File

@ -23,6 +23,19 @@
#ifndef __HWC_CPUS_H
#define __HWC_CPUS_H
typedef struct
{
int cpu_cnt;
int cpu_clk_freq;
int cpu_model;
int cpu_family;
int cpu_vendor;
char *cpu_vendorstr;
char *cpu_modelstr;
} cpu_info_t;
extern cpu_info_t *read_cpuinfo();
#define MAX_PICS 20 /* Max # of HW ctrs that can be enabled simultaneously */
/* type for specifying CPU register number */
@ -91,6 +104,8 @@
#define CPC_AMD_FAM_10H 2501 /* Barcelona, Shanghai... */
#define CPC_AMD_FAM_11H 2502 /* Griffin... */
#define CPC_AMD_FAM_15H 2503
#define CPC_AMD_Authentic 2504
#define CPC_KPROF 3003 // OBSOLETE (To support 12.3 and earlier)
#define CPC_FOX 3004 /* pseudo-chip */
@ -191,6 +206,7 @@ enum {
{CPC_ULTRA2 , "UltraSPARC I&II"}, \
{CPC_ULTRA1 , "UltraSPARC I&II"}, \
{ARM_CPU_IMP_APM , AARCH64_VENDORSTR_ARM}, \
{CPC_AMD_Authentic , "AuthenticAMD"}, \
{0, NULL}
/* init like this:
static libcpc2_cpu_lookup_t cpu_table[]={LIBCPC2_CPU_LOOKUP_LIST};

View File

@ -675,26 +675,21 @@ dump_perf_event_attr (struct perf_event_attr *at)
}
static void
init_perf_event (struct perf_event_attr *hw, uint64_t event, uint64_t period)
init_perf_event (struct perf_event_attr *hw, uint64_t event, uint64_t period,
Hwcentry *hwce)
{
memset (hw, 0, sizeof (struct perf_event_attr));
hw->size = sizeof (struct perf_event_attr); // fwd/bwd compat
#if defined(__i386__) || defined(__x86_64)
//note: Nehalem/Westmere OFFCORE_RESPONSE in upper 32 bits
hw->size = sizeof (struct perf_event_attr);
if (hwce && hwce->use_perf_event_type)
{
hw->config = hwce->config;
hw->type = hwce->type;
}
else
{ // backward compatibility. The old interface had no 'hwce' argument.
hw->config = event;
hw->type = PERF_TYPE_RAW; // hw/sw/trace/raw...
#elif defined(__aarch64__)
hw->type = (event >> 24) & 7;
hw->config = event & 0xff;
#elif defined(sparc)
//SPARC needs to be shifted up 16 bits
hw->config = (event & 0xFFFF) << 16; // uint64_t event
uint64_t regs = (event >> 20) & 0xf; // see sparc_pcbe.c
hw->config |= regs << 4; // for M8, supported PICs need to be placed at bits [7:4]
hw->type = PERF_TYPE_RAW; // hw/sw/trace/raw...
#endif
}
hw->sample_period = period;
hw->sample_type = PERF_SAMPLE_IP |
// PERF_SAMPLE_TID |
@ -858,7 +853,7 @@ hdrv_pcl_internal_open ()
perf_event_def_t tmp_event_def;
memset (&tmp_event_def, 0, sizeof (tmp_event_def));
struct perf_event_attr *pe_attr = &tmp_event_def.hw;
init_perf_event (pe_attr, 0, 0);
init_perf_event (pe_attr, 0, 0, NULL);
pe_attr->type = PERF_TYPE_HARDWARE; // specify abstracted HW event
pe_attr->config = PERF_COUNT_HW_INSTRUCTIONS; // specify abstracted insts
int hwc_fd = perf_event_open (pe_attr,
@ -1283,7 +1278,7 @@ hwcdrv_create_counters (unsigned hwcdef_cnt, Hwcentry *hwcdef)
glb_event_def->min_time = hwcdef[idx].min_time;
glb_event_def->name = strdup (hwcdef[idx].name); // memory leak??? very minor
init_perf_event (&glb_event_def->hw, glb_event_def->eventsel,
glb_event_def->counter_preload);
glb_event_def->counter_preload, hwcdef + idx);
TprintfT (DBG_LT1, "hwcdrv: create_counters: pic=%u name='%s' interval=%lld"
"(min_time=%lld): reg_num=0x%x eventsel=0x%llx ireset=%lld usr=%lld sys=%lld\n",
idx, hwcdef[idx].int_name, (long long) glb_event_def->counter_preload,

View File

@ -112,11 +112,12 @@ extern "C"
int timecvt; /* multiplier to convert metric to time, 0 if N/A */
ABST_type memop; /* type of backtracking allowed */
char *short_desc; /* optional one-liner description, or NULL */
int type; /* Type of perf_event_attr */
long long config; /* perf_event_type -specific configuration */
/* the fields above this line are expected, in order, by the tables in hwctable.c */
/* ================================================== */
/* the fields below this line are more flexible */
unsigned int use_perf_event_type : 16; /* Set 1 to use two fields below */
unsigned int type : 16; /* Type of perf_event_attr */
long long config; /* perf_event_type -specific configuration */
int sort_order; /* "tag" to associate experiment record with HWC def */
regno_t *reg_list; /* if not NULL, legal values for <reg_num> field above */
/* Note: reg_list will be terminated by REGNO_ANY */

View File

@ -259,18 +259,11 @@ process_data_descriptor (const char *defstring)
clear_hwcdefs ();
if (!defstring || !strlen (defstring))
{
err = HWCFUNCS_ERROR_HWCARGS;
goto ext_hw_install_end;
}
return HWCFUNCS_ERROR_HWCARGS;
ds = strdup (defstring);
if (!ds)
{
err = HWCFUNCS_ERROR_HWCINIT;
goto ext_hw_install_end;
}
return HWCFUNCS_ERROR_HWCINIT;
dsp = ds;
for (idx = 0; idx < MAX_PICS && *dsp; idx++)
{
char *name = NULL;
@ -281,13 +274,33 @@ process_data_descriptor (const char *defstring)
int timecvt = 0;
unsigned sort_order = (unsigned) - 1;
// Read use_perf_event_type, type, config
hwcdef[idx].use_perf_event_type = (int) strtol (dsp, &dsp, 0);
if (*dsp++ != ':')
{
err = HWCFUNCS_ERROR_HWCARGS;
break;
}
hwcdef[idx].type = (int) strtol (dsp, &dsp, 0);
if (*dsp++ != ':')
{
err = HWCFUNCS_ERROR_HWCARGS;
break;
}
hwcdef[idx].config = strtol (dsp, &dsp, 0);
if (*dsp++ != ':')
{
err = HWCFUNCS_ERROR_HWCARGS;
break;
}
/* name */
name = dsp;
dsp = strchr (dsp, ':');
if (dsp == NULL)
{
err = HWCFUNCS_ERROR_HWCARGS;
goto ext_hw_install_end;
break;
}
*dsp++ = (char) 0;
@ -297,7 +310,7 @@ process_data_descriptor (const char *defstring)
if (dsp == NULL)
{
err = HWCFUNCS_ERROR_HWCARGS;
goto ext_hw_install_end;
break;
}
*dsp++ = (char) 0;
@ -306,12 +319,12 @@ process_data_descriptor (const char *defstring)
if (*dsp++ != ':')
{
err = HWCFUNCS_ERROR_HWCARGS;
goto ext_hw_install_end;
break;
}
if (reg < 0 && reg != -1)
{
err = HWCFUNCS_ERROR_HWCARGS;
goto ext_hw_install_end;
break;
}
if (reg >= 0)
hwcdef[idx].reg_num = reg;
@ -321,21 +334,16 @@ process_data_descriptor (const char *defstring)
if (*dsp++ != ':')
{
err = HWCFUNCS_ERROR_HWCARGS;
goto ext_hw_install_end;
break;
}
if (interval < 0)
{
err = HWCFUNCS_ERROR_HWCARGS;
goto ext_hw_install_end;
break;
}
hwcdef[idx].val = interval;
/* min_time */
/*
* This is a new field.
* An old launcher (dbx, etc.) would not include it.
* Detect the presence of the field by the char 'm'.
*/
if (*dsp == 'm')
{
long long tmp_ll = 0;
@ -344,12 +352,12 @@ process_data_descriptor (const char *defstring)
if (*dsp++ != ':')
{
err = HWCFUNCS_ERROR_HWCARGS;
goto ext_hw_install_end;
break;
}
if (tmp_ll < 0)
{
err = HWCFUNCS_ERROR_HWCARGS;
goto ext_hw_install_end;
break;
}
hwcdef[idx].min_time = tmp_ll;
}
@ -361,7 +369,7 @@ process_data_descriptor (const char *defstring)
if (*dsp++ != ':')
{
err = HWCFUNCS_ERROR_HWCARGS;
goto ext_hw_install_end;
break;
}
hwcdef[idx].sort_order = sort_order;
@ -370,7 +378,7 @@ process_data_descriptor (const char *defstring)
if (*dsp++ != ':')
{
err = HWCFUNCS_ERROR_HWCARGS;
goto ext_hw_install_end;
break;
}
hwcdef[idx].timecvt = timecvt;
@ -379,7 +387,7 @@ process_data_descriptor (const char *defstring)
if (*dsp != 0 && *dsp++ != ',')
{
err = HWCFUNCS_ERROR_HWCARGS;
goto ext_hw_install_end;
break;
}
hwcdef[idx].memop = memop;
if (*name)
@ -394,27 +402,11 @@ process_data_descriptor (const char *defstring)
}
if (*dsp)
{
TprintfT (DBG_LT0, "hwcfuncs: ERROR: process_data_descriptor(): "
"ctr string had some trailing garbage:"
" '%s'\n", dsp);
err = HWCFUNCS_ERROR_HWCARGS;
goto ext_hw_install_end;
}
free (ds);
hwcdef_cnt = idx;
return 0;
ext_hw_install_end:
if (dsp && *dsp)
{
TprintfT (DBG_LT0, "hwcfuncs: ERROR: process_data_descriptor(): "
" syntax error just before:"
" '%s;\n", dsp);
if (err != 0)
logerr (GTXT ("Data descriptor syntax error near `%s'\n"), dsp);
}
else
logerr (GTXT ("Data descriptor syntax error\n"));
hwcdef_cnt = idx;
free (ds);
return err;
}

View File

@ -23,9 +23,9 @@
#include <errno.h>
#include <string.h>
#include <limits.h>
#include <linux/perf_event.h>
#include "hwcdrv.h"
#include "hwcfuncs.h"
/* TprintfT(<level>,...) definitions. Adjust per module as needed */
#define DBG_LT0 0 // for high-level configuration, unexpected errors/warnings
@ -2367,55 +2367,90 @@ static Hwcentry amd_15h[] = {
{NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
};
#define USE_ARM_REF_CYCLES \
{"usr_time","cycles", REGNO_ANY, STXT("User CPU"), PRELOADS_85, 1, ABST_NONE}, \
{"sys_time","cycles~system=1~user=0", REGNO_ANY, STXT("System CPU"), PRELOADS_85, 1, ABST_NONE}, \
#define INIT_HWC(nm, mtr, cfg, ty) .name = (nm), .metric = (mtr), \
.config = (cfg), .type = ty, .use_perf_event_type = 1, \
.val = PRELOAD_DEF, .reg_num = REGNO_ANY
#define HWE(nm, mtr, cfg) INIT_HWC(nm, mtr, cfg, PERF_TYPE_HARDWARE)
#define SWE(nm, mtr, cfg) INIT_HWC(nm, mtr, cfg, PERF_TYPE_SOFTWARE)
#define HWCE(nm, mtr, id, op, res) \
INIT_HWC(nm, mtr, (id) | ((op) << 8) | ((res) << 16), PERF_TYPE_HW_CACHE)
static Hwcentry armlist[] = {
USE_ARM_REF_CYCLES
static Hwcentry generic_list[] = {
// Hardware event:
{"branch-instructions", NULL, REGNO_ANY, STXT("Branch-instructions"), PRELOADS_35, 0, ABST_NONE},
{"branch-misses", NULL, REGNO_ANY, STXT("Branch-misses"), PRELOADS_35, 0, ABST_NONE},
{"bus-cycles", NULL, REGNO_ANY, STXT("Bus Cycles"), PRELOADS_35, 1, ABST_NONE},
{"cache-misses", NULL, REGNO_ANY, STXT("Cache-misses"), PRELOADS_35, 0, ABST_NONE},
{"cache-references", NULL, REGNO_ANY, STXT("Cache-references"), PRELOADS_35, 0, ABST_NONE},
{"cycles", NULL, REGNO_ANY, STXT("CPU Cycles"), PRELOADS_85, 1, ABST_NONE},
{"insts", "instructions", REGNO_ANY, STXT("Instructions Executed"), PRELOADS_75, 0, ABST_NONE},
{"ref-cycles", NULL, REGNO_ANY, STXT("Total Cycles"), PRELOADS_85, 1, ABST_NONE},
{"stalled-cycles-backend", NULL, REGNO_ANY, STXT("Stalled Cycles during issue."), PRELOADS_85, 1, ABST_NONE},
{"stalled-cycles-frontend", NULL, REGNO_ANY, STXT("Stalled Cycles during retirement."), PRELOADS_85, 1, ABST_NONE},
{ HWE("usr_time", STXT("User CPU"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1,
.int_name = "cycles" },
{ HWE("sys_time", STXT("System CPU"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1,
.int_name = "cycles~system=1~user=0" },
{ HWE("branch-instructions", STXT("Branch-instructions"),
PERF_COUNT_HW_BRANCH_INSTRUCTIONS) },
{ HWE("branch-misses", STXT("Branch-misses"), PERF_COUNT_HW_BRANCH_MISSES) },
{ HWE("bus-cycles", STXT("Bus Cycles"), PERF_COUNT_HW_BUS_CYCLES),
.timecvt = 1 },
{ HWE("cache-misses", STXT("Cache-misses"), PERF_COUNT_HW_CACHE_MISSES) },
{ HWE("cache-references", STXT("Cache-references"),
PERF_COUNT_HW_CACHE_REFERENCES) },
{ HWE("cycles", STXT("CPU Cycles"), PERF_COUNT_HW_CPU_CYCLES), .timecvt = 1 },
{ HWE("insts", STXT("Instructions Executed"), PERF_COUNT_HW_INSTRUCTIONS),
.int_name = "instructions" },
{ HWE("ref-cycles", STXT("Total Cycles"), PERF_COUNT_HW_REF_CPU_CYCLES),
.timecvt = 1 },
{ HWE("stalled-cycles-backend", STXT("Stalled Cycles during issue."),
PERF_COUNT_HW_STALLED_CYCLES_BACKEND), .timecvt = 1 },
{ HWE("stalled-cycles-frontend", STXT("Stalled Cycles during retirement."),
PERF_COUNT_HW_STALLED_CYCLES_FRONTEND), .timecvt = 1 },
// Software event:
{"alignment-faults", NULL, REGNO_ANY, STXT("Alignment Faults"), PRELOADS_85, 0, ABST_NONE},
{"context-switches", NULL, REGNO_ANY, STXT("Context Switches"), PRELOADS_85, 0, ABST_NONE},
{"cpu-clock", NULL, REGNO_ANY, STXT("CPU Clock"), PRELOADS_85, 1, ABST_NONE},
{"cpu-migrations", NULL, REGNO_ANY, STXT("CPU Migrations"), PRELOADS_85, 0, ABST_NONE},
{"emulation-faults", NULL, REGNO_ANY, STXT("Emulation Faults"), PRELOADS_85, 0, ABST_NONE},
{"major-faults", NULL, REGNO_ANY, STXT("Major Page Faults"), PRELOADS_85, 0, ABST_NONE},
{"minor-faults", NULL, REGNO_ANY, STXT("Minor Page Faults"), PRELOADS_85, 0, ABST_NONE},
{"page-faults", NULL, REGNO_ANY, STXT("Page Faults"), PRELOADS_85, 0, ABST_NONE},
{"task-clock", NULL, REGNO_ANY, STXT("Clock Count Specific"), PRELOADS_85, 1, ABST_NONE},
{ SWE("alignment-faults", STXT("Alignment Faults"),
PERF_COUNT_SW_ALIGNMENT_FAULTS) },
{ SWE("context-switches", STXT("Context Switches"),
PERF_COUNT_SW_CONTEXT_SWITCHES) },
{ SWE("cpu-clock", STXT("CPU Clock"), PERF_COUNT_SW_CPU_CLOCK),
.timecvt = 1 },
{ SWE("cpu-migrations", STXT("CPU Migrations"),
PERF_COUNT_SW_CPU_MIGRATIONS) },
{ SWE("emulation-faults", STXT("Emulation Faults"),
PERF_COUNT_SW_EMULATION_FAULTS) },
{ SWE("major-faults", STXT("Major Page Faults"),
PERF_COUNT_SW_PAGE_FAULTS_MAJ) },
{ SWE("minor-faults", STXT("Minor Page Faults"),
PERF_COUNT_SW_PAGE_FAULTS_MIN) },
{ SWE("page-faults", STXT("Page Faults"), PERF_COUNT_SW_PAGE_FAULTS) },
{ SWE("task-clock", STXT("Clock Count Specific"), PERF_COUNT_SW_TASK_CLOCK),
.timecvt = 1 },
// Hardware cache event
{"L1-dcache-load-misses", NULL, REGNO_ANY, STXT("L1 D-cache Load Misses"), PRELOADS_35, 0, ABST_NONE},
{"L1-dcache-loads", NULL, REGNO_ANY, STXT("L1 D-cache Loads"), PRELOADS_35, 0, ABST_NONE},
{"L1-dcache-store-misses", NULL, REGNO_ANY, STXT("L1 D-cache Store Misses"), PRELOADS_35, 0, ABST_NONE},
{"L1-dcache-stores", NULL, REGNO_ANY, STXT("L1 D-cache Store Stores"), PRELOADS_35, 0, ABST_NONE},
{"L1-icache-load-misses", NULL, REGNO_ANY, STXT("L1 Instructions Load Misses"), PRELOADS_35, 0, ABST_NONE},
{"L1-icache-load-misses", NULL, REGNO_ANY, STXT("L1 Instructions Loads"), PRELOADS_35, 0, ABST_NONE},
{"dTLB-load-misses", NULL, REGNO_ANY, STXT("D-TLB Load Misses"), PRELOADS_35, 0, ABST_NONE},
{"dTLB-loads", NULL, REGNO_ANY, STXT("D-TLB Loads"), PRELOADS_35, 0, ABST_NONE},
{"iTLB-load-misses", NULL, REGNO_ANY, STXT("The Instruction TLB Load Misses"), PRELOADS_35, 0, ABST_NONE},
{"iTLB-loads", NULL, REGNO_ANY, STXT("The Instruction TLB Loads"), PRELOADS_35, 0, ABST_NONE},
{ HWCE("L1-dcache-load-misses", STXT("L1 D-cache Load Misses"),
PERF_COUNT_HW_CACHE_L1D,
PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },
{ HWCE("L1-dcache-loads", STXT("L1 D-cache Loads"),
PERF_COUNT_HW_CACHE_L1D,
PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
{ HWCE("L1-dcache-store-misses", STXT("L1 D-cache Store Misses"),
PERF_COUNT_HW_CACHE_L1D,
PERF_COUNT_HW_CACHE_RESULT_MISS, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
{ HWCE("L1-dcache-stores", STXT("L1 D-cache Store Stores"),
PERF_COUNT_HW_CACHE_L1D,
PERF_COUNT_HW_CACHE_OP_WRITE, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
{ HWCE("L1-icache-load-misses", STXT("L1 Instructions Load Misses"),
PERF_COUNT_HW_CACHE_L1I,
PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },
{ HWCE("L1-icache-load-misses", STXT("L1 Instructions Loads"),
PERF_COUNT_HW_CACHE_L1I,
PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
{ HWCE("dTLB-load-misses", STXT("D-TLB Load Misses"),
PERF_COUNT_HW_CACHE_DTLB,
PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },
{ HWCE("dTLB-loads", STXT("D-TLB Loads"),
PERF_COUNT_HW_CACHE_DTLB,
PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
{ HWCE("iTLB-load-misses", STXT("The Instruction TLB Load Misses"),
PERF_COUNT_HW_CACHE_ITLB,
PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS) },
{ HWCE("iTLB-loads", STXT("The Instruction TLB Loads"),
PERF_COUNT_HW_CACHE_ITLB,
PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) },
{NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
};
static Hwcentry unknownlist[] =
/* used for unrecognized CPU type */{
{NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE}
};
/* structure defining the counters for a CPU type */
typedef struct
{
@ -2485,8 +2520,9 @@ static cpu_list_t cputabs[] = {
{CPC_SPARC64_X, usfuji_X_list, {"insts,,cycles,,dcstall", 0}},
{CPC_SPARC64_XII, usfuji_XII_list, {"insts,,cycles,,dcstall", 0}},
{CPC_KPROF, kproflist, {NULL}}, // OBSOLETE (To support 12.3 and earlier, TBR)
{ARM_CPU_IMP_APM, armlist, {"insts,,cycles", 0}},
{0, unknownlist, {NULL}} /* processor is unknown, but experiment is allowed */
{ARM_CPU_IMP_APM, generic_list, {"insts,,cycles", 0}},
{CPC_AMD_Authentic, generic_list, {"insts,,cycles", 0}},
{0, generic_list, {"insts,,cycles", 0}},
};
/*---------------------------------------------------------------------------*/

View File

@ -315,30 +315,25 @@ static int
opt_pcbe_init (void)
{
amd_family = cpuid_getfamily ();
/*
* Make sure this really _is_ an Opteron or Athlon 64 system. The kernel
* loads this module based on its name in the module directory, but it
* could have been renamed.
*/
if (cpuid_getvendor () != X86_VENDOR_AMD
|| (amd_family != OPTERON_FAMILY && amd_family != AMD_FAMILY_10H))
return (-1);
if (cpuid_getvendor () != X86_VENDOR_AMD)
return -1;
/*
* Figure out processor revision here and assign appropriate
* event configuration.
*/
if (amd_family == OPTERON_FAMILY)
switch (amd_family)
{
case OPTERON_FAMILY:
amd_events = opt_events_rev_E;
amd_generic_events = opt_generic_events;
}
else
{
break;
case AMD_FAMILY_10H:
amd_events = family_10h_events;
amd_generic_events = family_10h_generic_events;
break;
}
return (0);
return 0;
}
static uint_t

View File

@ -39,7 +39,7 @@
#include "libiberty.h"
#include "collctrl.h"
#include "hwcdrv.h"
//#include "hwcfuncs.h"
#include "StringBuilder.h"
#define SP_GROUP_HEADER "#analyzer experiment group"
#define DD_MAXPATHLEN (MAXPATHLEN * 4) /* large, to build up data descriptor */
@ -55,7 +55,84 @@ extern const char *strsignal (int);
#define _SC_CPUID_MAX 517
#endif
const char *get_fstype (char *);
static const char *get_fstype (char *);
static cpu_info_t cpu_info;
static void
read_str (char *from, char **to)
{
if (*to != NULL)
return;
for (char *s = from; *s; s++)
if (*s != ':' && *s != '\t' && *s != ' ')
{
for (int i = ((int) strlen (s)) - 1; i >= 0; i--)
{
if (s[i] != '\n' && s[i] != ' ' && s[i] != '\t')
{
*to = strndup(s, i + 1);
return;
}
}
return; // string is empty
}
}
static int
read_int (char *from)
{
char *val = strchr (from, ':');
if (val)
return atoi (val + 1);
return 0;
}
cpu_info_t *
read_cpuinfo()
{
static int inited = 0;
if (inited)
return &cpu_info;
inited = 1;
#if defined(__aarch64__)
asm volatile("mrs %0, cntfrq_el0" : "=r" (cpu_info.cpu_clk_freq));
#endif
// Read /proc/cpuinfo to get CPU info and clock rate
FILE *procf = fopen ("/proc/cpuinfo", "r");
if (procf != NULL)
{
char temp[1024];
while (fgets (temp, (int) sizeof (temp), procf) != NULL)
{
if (strncmp (temp, "processor", 9) == 0)
cpu_info.cpu_cnt++;
else if (strncmp (temp, "cpu MHz", 7) == 0)
cpu_info.cpu_clk_freq = read_int (temp + 9);
else if (strncmp (temp, "cpu family", 10) == 0)
cpu_info.cpu_family = read_int (temp + 10);
else if (strncmp (temp, "vendor_id", 9) == 0)
{
if (cpu_info.cpu_vendorstr == NULL)
read_str (temp + 9, &cpu_info.cpu_vendorstr);
}
else if (strncmp (temp, "model name", 10) == 0)
{
if (cpu_info.cpu_modelstr == NULL)
read_str (temp + 10, &cpu_info.cpu_modelstr);
}
else if (strncmp (temp, "model", 5) == 0)
cpu_info.cpu_model = read_int (temp + 5);
else if (strncmp (temp, "CPU implementer", 15) == 0)
cpu_info.cpu_family = read_int (temp + 15);
else if (strncmp (temp, "CPU architecture", 16) == 0)
cpu_info.cpu_model = read_int (temp + 16);
}
fclose (procf);
}
return &cpu_info;
}
Coll_Ctrl::Coll_Ctrl (int _interactive, bool _defHWC, bool _kernelHWC)
{
@ -81,59 +158,9 @@ Coll_Ctrl::Coll_Ctrl (int _interactive, bool _defHWC, bool _kernelHWC)
/* add 2048 to count, since on some systems CPUID does not start at zero */
ncpumax = ncpus + 2048;
}
ncpus = 0;
cpu_clk_freq = 0;
// On Linux, read /proc/cpuinfo to get CPU count and clock rate
// Note that parsing is different on SPARC and x86
#if defined(sparc)
FILE *procf = fopen ("/proc/cpuinfo", "r");
if (procf != NULL)
{
char temp[1024];
while (fgets (temp, (int) sizeof (temp), procf) != NULL)
{
if (strncmp (temp, "Cpu", 3) == 0 && temp[3] != '\0'
&& strncmp ((strchr (temp + 1, 'C')) ? strchr (temp + 1, 'C')
: (temp + 4), "ClkTck", 6) == 0)
{
ncpus++;
char *val = strchr (temp, ':');
if (val)
{
unsigned long long freq;
sscanf (val + 2, "%llx", &freq);
cpu_clk_freq = (unsigned int) (((double) freq) / 1000000.0 + 0.5);
}
else
cpu_clk_freq = 0;
}
}
fclose (procf);
}
#elif defined(__aarch64__)
asm volatile("mrs %0, cntfrq_el0" : "=r" (cpu_clk_freq));
#else
FILE *procf = fopen ("/proc/cpuinfo", "r");
if (procf != NULL)
{
char temp[1024];
while (fgets (temp, (int) sizeof (temp), procf) != NULL)
{
// x86 Linux
if (strncmp (temp, "processor", 9) == 0)
ncpus++;
else if (strncmp (temp, "cpu MHz", 7) == 0)
{
char *val = strchr (temp, ':');
cpu_clk_freq = val ? atoi (val + 1) : 0;
}
}
fclose (procf);
}
#endif
cpu_info_t *cpu_p = read_cpuinfo();
ncpus = cpu_p->cpu_cnt;
cpu_clk_freq = cpu_p->cpu_clk_freq;
/* check resolution of system clock */
sys_resolution = sysconf (_SC_CLK_TCK);
@ -1720,78 +1747,62 @@ Coll_Ctrl::set_size_limit (const char *string)
void
Coll_Ctrl::build_data_desc ()
{
char spec[DD_MAXPATHLEN];
spec[0] = 0;
StringBuilder sb;
// Put sample sig before clock profiling. Dbx uses PROF
// for that purpose and we want it to be processed first.
if (project_home)
snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "P:%s;", project_home);
sb.appendf ("P:%s;", project_home);
if (sample_sig != 0)
snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "g:%d;", sample_sig);
sb.appendf ("g:%d;", sample_sig);
if (pauseresume_sig != 0)
snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "d:%d%s;", pauseresume_sig,
(pauseresume_pause == 1 ? "p" : ""));
sb.appendf ("d:%d%s;", pauseresume_sig, pauseresume_pause == 1 ? "p" : "");
if (clkprof_enabled == 1)
snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "p:%d;", clkprof_timer);
sb.appendf ("p:%d;", clkprof_timer);
if (synctrace_enabled == 1)
snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "s:%d,%d;", synctrace_thresh, synctrace_scope);
sb.appendf ("s:%d,%d;", synctrace_thresh, synctrace_scope);
if (heaptrace_enabled == 1)
snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "H:%d;", heaptrace_checkenabled);
sb.appendf ("H:%d;", heaptrace_checkenabled);
if (iotrace_enabled == 1)
snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "i:;");
sb.append ("i:;");
if (hwcprof_enabled_cnt > 0)
{
snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "h:%s",
(hwcprof_default == true) ? "*" : "");
sb.appendf ("h:%s", (hwcprof_default == true) ? "*" : "");
for (int ii = 0; ii < hwcprof_enabled_cnt; ii++)
{
/* min_time is a "new" field.
*
* To help process_data_descriptor() in hwcfuncs.c parse
* the HWC portion of this string -- specifically, to
* recognize min_time when it's present and skip over
* when it's not -- we prepend 'm' to the min_time value.
*
* When we no longer worry about, say, an old dbx
* writing this string and a new libcollector looking for
* the min_time field, the 'm' character can be
* removed and process_data_descriptor() simplified.
*/
hrtime_t min_time = hwctr[ii].min_time;
Hwcentry *h = hwctr + ii;
hrtime_t min_time = h->min_time;
if (min_time == HWCTIME_TBD)
// user did not specify any value for overflow rate
min_time = hwctr[ii].min_time_default;
snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec),
"%s%s:%s:%d:%d:m%lld:%d:%d:0x%x", ii ? "," : "",
strcmp (hwctr[ii].name, hwctr[ii].int_name) ? hwctr[ii].name : "",
hwctr[ii].int_name, hwctr[ii].reg_num, hwctr[ii].val,
min_time, ii, /*tag*/ hwctr[ii].timecvt, hwctr[ii].memop);
min_time = h->min_time_default;
if (ii > 0)
sb.append (',');
sb.appendf ("%d:%d:%lld:%s:%s:%lld:%d:m%lld:%d:%d:0x%x",
h->use_perf_event_type, h->type, (long long) h->config,
strcmp (h->name, h->int_name) ? h->name : "",
h->int_name, (long long) h->reg_num, h->val,
(long long) min_time, ii, /*tag*/ h->timecvt, h->memop);
}
snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), ";");
sb.append (";");
}
if ((time_run != 0) || (start_delay != 0))
if (time_run != 0 || start_delay != 0)
{
if (start_delay != 0)
snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "t:%d:%d;", start_delay, time_run);
sb.appendf ("t:%d:%d;", start_delay, time_run);
else
snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "t:%d;", time_run);
sb.appendf ("t:%d;", time_run);
}
if (sample_period != 0)
snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "S:%d;",
sample_period);
sb.appendf ("S:%d;", sample_period);
if (size_limit != 0)
snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "L:%d;",
size_limit);
sb.appendf ("L:%d;", size_limit);
if (java_mode != 0)
snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "j:%d;", (int) java_mode);
sb.appendf ("j:%d;", (int) java_mode);
if (follow_mode != FOLLOW_NONE)
snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "F:%d;", (int) follow_mode);
snprintf (spec + strlen (spec), sizeof (spec) - strlen (spec), "a:%s;", archive_mode);
if (strlen (spec) + 1 >= sizeof (spec))
abort ();
sb.appendf ("F:%d;", (int) follow_mode);
sb.appendf ("a:%s;", archive_mode);
free (data_desc);
data_desc = strdup (spec);
data_desc = sb.toString ();
}
char *