-
Notifications
You must be signed in to change notification settings - Fork 350
Telemetry2 thread info #9084
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Telemetry2 thread info #9084
Changes from 1 commit
c733f2a
0acff86
2f7a713
aad8ab4
ab028a0
7747c68
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
Adds code for collecting thread statistic for cpu and stack usage, and for writing that information to telemetry2 memory chuck. Signed-off-by: Jyri Sarha <jyri.sarha@linux.intel.com>
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,7 @@ | ||
| # SPDX-License-Identifier: BSD-3-Clause | ||
|
|
||
| add_local_sources_ifdef(CONFIG_SOF_TELEMETRY sof telemetry.c) | ||
|
|
||
| add_local_sources_ifdef(CONFIG_SOF_TELEMETRY2_SLOT sof telemetry2_slot.c) | ||
|
|
||
| add_local_sources_ifdef(CONFIG_SOF_TELEMETRY2_THREAD_INFO sof telemetry2_thread_info.c) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,361 @@ | ||
| // SPDX-License-Identifier: BSD-3-Clause | ||
| // | ||
| // Copyright(c) 2024 Intel Corporation. | ||
|
|
||
| #include <zephyr/logging/log.h> | ||
| #include <zephyr/kernel.h> | ||
| #include <stdio.h> | ||
| #include <soc.h> | ||
| #include <adsp_debug_window.h> | ||
|
|
||
| #include "telemetry2_slot.h" | ||
|
|
||
| LOG_MODULE_REGISTER(thread_info); | ||
|
|
||
| #define THREAD_INFO_MAX_THREADS 16 | ||
|
|
||
| struct thread_info { | ||
| char name[14]; | ||
| uint8_t stack_usage; /* Relative stack usage U(0,8) fixed point value */ | ||
| uint8_t cpu_usage; /* Relative cpu usage U(0,8) fixed point value */ | ||
| } __packed; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. aligned ?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need. The records are in a table that is core specific.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, but wont that need cache line alignment ?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The table should cache aligned, but not single element, holding information for just one thread. One cache line can hold info for 4 threads. Currently the code reserves table of 16 |
||
|
|
||
| #define THREAD_INFO_VERSION_0_0 0 | ||
|
|
||
| #define THREAD_INFO_STATE_UNINITIALIZED 0 | ||
| #define THREAD_INFO_STATE_BEING_UPDATED 1 | ||
| #define THREAD_INFO_STATE_UPTODATE 2 | ||
|
|
||
| /* Core specific data, updated each round, including the thread table. */ | ||
| struct thread_info_core { | ||
| uint8_t state; /* indicates if core data is in consistent state */ | ||
| uint8_t counter; /* incremented every round */ | ||
| uint8_t load; /* Core's load U(0,8) fixed point value */ | ||
| uint8_t thread_count; | ||
| struct thread_info thread[THREAD_INFO_MAX_THREADS]; | ||
| } __packed __aligned(CONFIG_DCACHE_LINE_SIZE); | ||
|
|
||
| /* Thread info telemetry2 chunk header. Only initialized at first | ||
| * thread info thread start. Should be used to find the core specific | ||
| * sections where the thread info is when decoding. | ||
| */ | ||
| struct thread_info_chunk { | ||
| struct telemetry2_chunk_hdr hdr; | ||
| uint16_t core_count; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. a thread can run on multiple cores?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the header of the whole thread info chunk, and the number indicates how many cores we have. The code keeps all the core separate all the time. Nothing else seems to work (DSP hangs). So there is a thread running on each core reporting the threads on that core. This header is just written once in the beginning and is used by the user-space client to find the core specific sections. I'll add some more comments. |
||
| uint16_t core_offset[CONFIG_MP_MAX_NUM_CPUS]; | ||
| struct thread_info_core __aligned(CONFIG_DCACHE_LINE_SIZE) core[CONFIG_MP_MAX_NUM_CPUS]; | ||
| } __packed; | ||
|
|
||
| #ifdef CONFIG_THREAD_RUNTIME_STATS | ||
| /* Data structure to store the cycle counter values from the previous | ||
| * round. The numbers are used to calculate what the load was on this | ||
| * round. | ||
| */ | ||
| static struct previous_counters { // Cached data from previous round | ||
| uint64_t active; // All execution cycles | ||
| uint64_t all; // All cycles including idle | ||
| struct thread_counters { | ||
| void *tid; // thread ID (the thread struct ptr) | ||
| uint64_t cycles; // cycle counter value | ||
| } threads[THREAD_INFO_MAX_THREADS]; // The max amount of threads we follow | ||
| } previous[CONFIG_MP_MAX_NUM_CPUS]; | ||
| #endif | ||
|
|
||
| /* Data structure to be passed down to thread_info_cb() by | ||
| * k_thread_foreach_current_cpu(). | ||
| */ | ||
| struct user_data { | ||
| struct thread_info_core *core_data; | ||
| int thread_count; | ||
| #ifdef CONFIG_THREAD_RUNTIME_STATS | ||
| bool stats_valid; | ||
| uint32_t all_cycles; | ||
| void *active_threads[THREAD_INFO_MAX_THREADS]; | ||
| struct previous_counters *previous; | ||
| #endif | ||
| }; | ||
|
|
||
| #ifdef CONFIG_THREAD_RUNTIME_STATS | ||
|
|
||
| /* Finds and/or updates cached cycle counter value for 'tid'-thread | ||
| * and calculates used execution cycles since previous round and | ||
| * returns it. | ||
| */ | ||
|
|
||
| static uint32_t get_cycles(void *tid, k_thread_runtime_stats_t *thread_stats, | ||
| struct user_data *ud, const char *name) | ||
|
|
||
| { | ||
| int i; | ||
|
|
||
| ud->active_threads[ud->thread_count] = tid; // Mark the thread as active | ||
| // look for cached value from previous round for 'tid'-thread | ||
jsarha marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| for (i = 0; i < ARRAY_SIZE(ud->previous->threads); i++) { | ||
| if (ud->previous->threads[i].tid == tid) { | ||
| // Calculate number cycles since previous round | ||
| uint32_t cycles = (uint32_t) (thread_stats->execution_cycles - | ||
| ud->previous->threads[i].cycles); | ||
|
|
||
| LOG_DBG("%p found at %d (%s %llu)", tid, i, | ||
| name, thread_stats->execution_cycles); | ||
| // updare cached value | ||
| ud->previous->threads[i].cycles = thread_stats->execution_cycles; | ||
| return cycles; | ||
| } | ||
| } | ||
|
|
||
| // If no cached value was found, look for an empty slot to store the recent value | ||
| for (i = 0; i < ARRAY_SIZE(ud->previous->threads); i++) { | ||
| if (ud->previous->threads[i].tid == NULL) { | ||
| ud->previous->threads[i].tid = tid; | ||
| ud->previous->threads[i].cycles = thread_stats->execution_cycles; | ||
| LOG_DBG("%p placed at %d (%s %llu)", tid, i, | ||
| name, ud->previous->threads[i].cycles); | ||
| break; | ||
| } | ||
| } | ||
|
|
||
| // If there is more than THREAD_INFO_MAX_THREADS threads in the system | ||
| if (i == ARRAY_SIZE(ud->previous->threads)) | ||
| LOG_INF("No place found for %s %p", name, tid); | ||
|
|
||
| // If there was no previous counter value to compare, return 0 cycles. | ||
| return 0; | ||
| } | ||
|
|
||
| static uint8_t thread_info_cpu_utilization(struct k_thread *thread, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like some of this file is RTOS specific and should go in Zephyr as Zephyr APIs.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Its just refining the Zephyr thread stats cycle-counter values to produce load information. It requires cached values from previous round, and a generic Zephyr size implementation would become quite complex, requiring caller specific context allocation etc. If there is time for it, I can do it of course, but AFAIK we do not have it ATM. |
||
| struct user_data *ud, const char *name) | ||
| { | ||
| k_thread_runtime_stats_t thread_stats; | ||
| uint32_t cycles; | ||
| int ret; | ||
|
|
||
| if (!ud->stats_valid) | ||
| return 0; | ||
|
|
||
| if (k_thread_runtime_stats_get(thread, &thread_stats) != 0) | ||
| return 0; | ||
|
|
||
| cycles = get_cycles(thread, &thread_stats, ud, name); | ||
|
|
||
| LOG_DBG("thread %s %u / %u", name, cycles, ud->all_cycles); | ||
|
|
||
| return (uint8_t) ((255llu * cycles) / ud->all_cycles); | ||
| } | ||
| #else | ||
| static uint8_t thread_info_cpu_utilization(struct k_thread *thread, | ||
| struct user_data *ud) | ||
| { | ||
| return 0; | ||
| } | ||
| #endif | ||
|
|
||
| #ifdef CONFIG_THREAD_STACK_INFO | ||
| static uint8_t thread_info_stack_level(struct k_thread *thread, const char *name) | ||
| { | ||
| size_t stack_size, stack_unused; | ||
| int ret; | ||
|
|
||
| stack_size = thread->stack_info.size; | ||
| ret = k_thread_stack_space_get(thread, &stack_unused); | ||
| if (ret) { | ||
| LOG_ERR(" %-20s: unable to get stack space (%d)", | ||
| name, ret); | ||
| stack_unused = 0; | ||
| } | ||
| return (UINT8_MAX * (stack_size - stack_unused)) / stack_size; | ||
| } | ||
| #else | ||
| static uint8_t thread_info_stack_level(struct k_thread *thread, const char *name) | ||
| { | ||
| return 0; | ||
| } | ||
| #endif | ||
|
|
||
| static void thread_info_cb(const struct k_thread *cthread, void *user_data) | ||
| { | ||
| struct k_thread *thread = (struct k_thread *)cthread; | ||
| struct user_data *ud = user_data; | ||
| struct thread_info *thread_info; | ||
| const char *name; | ||
|
|
||
| if (ud->thread_count > ARRAY_SIZE(ud->core_data->thread)) { | ||
| LOG_ERR("Thread count %u exceeds the memory window size\n", | ||
| ud->thread_count++); | ||
| return; | ||
| } | ||
|
|
||
| thread_info = &ud->core_data->thread[ud->thread_count]; | ||
|
|
||
| name = k_thread_name_get((k_tid_t)thread); | ||
| if (!name || name[0] == '\0') { | ||
| snprintk(thread_info->name, sizeof(thread_info->name), "%p", | ||
| (void *)thread); | ||
| } else { | ||
| strncpy(thread_info->name, name, sizeof(thread_info->name)); | ||
| } | ||
|
|
||
| thread_info->stack_usage = thread_info_stack_level(thread, name); | ||
| thread_info->cpu_usage = thread_info_cpu_utilization(thread, ud, name); | ||
|
|
||
| LOG_DBG("core %u %s stack %u%% cpu %u%%", ud->core, | ||
| thread_info->name, thread_info->stack_usage * 100U / 255, | ||
| thread_info->cpu_usage * 100U / 255); | ||
|
|
||
| ud->thread_count++; | ||
| } | ||
|
|
||
| #ifdef CONFIG_THREAD_RUNTIME_STATS | ||
|
|
||
| /* Marks cached thread cycle counter entries of removed threads | ||
| * free. This also happens to threads pinned to other cpu than the | ||
| * primary. In the beginning they are listed on primary cpu, until the | ||
| * pinned cpu is started up and the thread is executed for the fist | ||
| * time and it moves to the cpu its pinned on. | ||
| */ | ||
| static void cleanup_old_thread_cycles(struct user_data *ud) | ||
| { | ||
| int i, j; | ||
|
|
||
| for (i = 0; i < ARRAY_SIZE(ud->previous->threads); i++) { | ||
| bool found = false; | ||
|
|
||
| // This entry is already free, continue | ||
| if (ud->previous->threads[i].tid == NULL) | ||
| continue; | ||
|
|
||
| // Check if the thread is any more active | ||
| for (j = 0; j < ud->thread_count; j++) { | ||
| if (ud->active_threads[j] == ud->previous->threads[i].tid) { | ||
| found = true; | ||
| break; | ||
| } | ||
| } | ||
| // If the thead is not any more active, mark the entry free | ||
| if (!found) { | ||
| ud->previous->threads[i].tid = NULL; | ||
| ud->previous->threads[i].cycles = 0; | ||
| } | ||
| } | ||
| } | ||
| #else | ||
| static void cleanup_old_thread_cycles(struct user_data *ud) { } | ||
| #endif | ||
|
|
||
| static void thread_info_get(struct thread_info_core *core_data) | ||
| { | ||
| k_thread_runtime_stats_t core_stats; | ||
| struct user_data ud = { | ||
| .core_data = core_data, | ||
| .thread_count = 0, | ||
| #ifdef CONFIG_THREAD_RUNTIME_STATS | ||
| .previous = &previous[arch_curr_cpu()->id], | ||
| .active_threads = { NULL }, | ||
| #endif | ||
| }; | ||
| uint8_t load = 0; | ||
| #ifdef CONFIG_THREAD_RUNTIME_STATS | ||
| int ret = k_thread_runtime_stats_current_cpu_get(&core_stats); | ||
|
|
||
| if (ret == 0) { | ||
| uint32_t active_cycles = (uint32_t) (core_stats.total_cycles - | ||
| ud.previous->active); | ||
| uint32_t all_cycles = (uint32_t) (core_stats.execution_cycles - | ||
| ud.previous->all); | ||
|
|
||
| ud.stats_valid = true; | ||
| load = (uint8_t) ((255LLU * active_cycles) / all_cycles); | ||
| LOG_DBG("Core %u load %u / %u total %llu / %llu", arch_curr_cpu()->id, | ||
| active_cycles, all_cycles, | ||
| core_stats.total_cycles, core_stats.execution_cycles); | ||
| ud.previous->active = core_stats.total_cycles; | ||
| ud.previous->all = core_stats.execution_cycles; | ||
| ud.all_cycles = all_cycles; | ||
| } | ||
| #endif | ||
| core_data->state = THREAD_INFO_STATE_BEING_UPDATED; | ||
|
|
||
| core_data->load = load; | ||
| k_thread_foreach_current_cpu(thread_info_cb, &ud); | ||
|
|
||
| cleanup_old_thread_cycles(&ud); | ||
|
|
||
| core_data->counter++; | ||
| core_data->thread_count = ud.thread_count; | ||
| core_data->state = THREAD_INFO_STATE_UPTODATE; | ||
| } | ||
|
|
||
| static void thread_info_run(void *data, void *cnum, void *a) | ||
| { | ||
| struct thread_info_chunk *chunk = data; | ||
| int core = (int) cnum; | ||
| struct thread_info_core *core_data = &chunk->core[core]; | ||
|
|
||
| for (;;) { | ||
| thread_info_get(core_data); | ||
| k_sleep(K_SECONDS(CONFIG_SOF_TELEMETRY2_THREAD_INFO_INTERVAL)); | ||
| } | ||
| } | ||
|
|
||
| static struct thread_info_chunk *thread_info_init(void) | ||
| { | ||
| struct thread_info_chunk *chunk; | ||
| int i; | ||
|
|
||
| chunk = telemetry2_chunk_get(TELEMETRY2_ID_THREAD_INFO, | ||
| sizeof(*chunk)); | ||
| if (!chunk) | ||
| return NULL; | ||
|
|
||
| chunk->core_count = CONFIG_MP_MAX_NUM_CPUS; | ||
| for (i = 0; i < CONFIG_MP_MAX_NUM_CPUS; i++) | ||
| chunk->core_offset[i] = offsetof(struct thread_info_chunk, core[i]); | ||
|
|
||
| return chunk; | ||
| } | ||
|
|
||
| static K_THREAD_STACK_ARRAY_DEFINE(info_thread_stacks, CONFIG_MP_MAX_NUM_CPUS, | ||
| 1024); | ||
| static struct k_thread info_thread[CONFIG_MP_MAX_NUM_CPUS]; | ||
|
|
||
| static int thread_info_start(void) | ||
| { | ||
| struct thread_info_chunk *chunk = thread_info_init(); | ||
| uint32_t i; | ||
|
|
||
| if (!chunk) | ||
| return 0; | ||
|
|
||
| for (i = 0; i < ARRAY_SIZE(info_thread); i++) { | ||
| char name[24]; | ||
| k_tid_t tid = NULL; | ||
| int ret; | ||
|
|
||
| tid = k_thread_create(&info_thread[i], info_thread_stacks[i], | ||
| 1024, | ||
| thread_info_run, | ||
| chunk, (void *) i, NULL, | ||
| K_LOWEST_APPLICATION_THREAD_PRIO, 0, K_FOREVER); | ||
| if (!tid) { | ||
| LOG_ERR("k_thread_create() failed for core %u", i); | ||
| continue; | ||
| } | ||
| ret = k_thread_cpu_pin(tid, i); | ||
| if (ret < 0) { | ||
| LOG_ERR("Pinning thread to code core %u", i); | ||
| k_thread_abort(tid); | ||
| continue; | ||
| } | ||
| snprintf(name, sizeof(name), "%u thread info", i); | ||
| ret = k_thread_name_set(tid, name); | ||
| if (ret < 0) | ||
| LOG_INF("k_thread_name_set failed: %d for %u", ret, i); | ||
|
|
||
| k_thread_start(tid); | ||
| LOG_DBG("Thread %p for core %u started", tid, i); | ||
| } | ||
|
|
||
| return 0; | ||
| } | ||
|
|
||
| SYS_INIT(thread_info_start, APPLICATION, CONFIG_KERNEL_INIT_PRIORITY_DEFAULT); | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
where does the 14 come from?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just a nice number to get 16-byte thread records. The rest of the thread name is truncated. Actually there are no names available unless CONFIG_THREAD_NAME=y . That should maybe be selected by SOF_TELEMETRY2_THREAD_INFO option.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is there a Zephyr define for this ? - can we reuse it ?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I could not find a single define setting max length for thread name. Instead there are two local ones, for presenting the name in:
subsys/net/lib/shell/sockets.c:#define THREAD_NAME_LEN 23
subsys/tracing/sysview/sysview_config.c:#define THREAD_NAME_LEN 20
I wonder if there is any maximum length. Its just a string that is not used in the Zephyr core code for anything, its there just for human observer. 14 makes the whole packed struct nicely 16 byte aligned, but I can of course make it what ever we want it to be. If the name is longer, the rest is truncated from Thread info output.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good find - @teburd fyi some mismatching lengths for strings.
@jsarha I assume this is just a const char * in thread.h ? if so, you could do the same here ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, its actually a Kconfig variable THREAD_MAX_NAME_LEN, and defaults to 32. However, with the static data structure I did not want to waste memory for maximum thread name length, when 14 is usually enough. I think that other name length definitions are there for similar reasons. In any case, the in the new circular buffer based approach I intend to make the name length dynamic, not static.