You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

612 lines
17 KiB
C

#include "profile.h"
#include "imap.h"
#include "icallpath.h"
#include "lobject.h"
#include "lstate.h"
#include <pthread.h>
#ifdef USE_GOOGLE_PROFILER
#include <gperftools/profiler.h>
#endif
#define MAX_CALL_SIZE 1024
#define MAX_CO_SIZE 1024
#define NANOSEC 1000000000
#define MICROSEC 1000000
#define KEY "swt_profiler"
#ifdef USE_RDTSC
#include "rdtsc.h"
static inline uint64_t
gettime() {
return rdtsc();
}
static inline double
realtime(uint64_t t) {
return (double) t / (2000000000);
}
#else
static inline uint64_t
gettime() {
struct timespec ti;
// clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ti);
// clock_gettime(CLOCK_MONOTONIC, &ti);
clock_gettime(CLOCK_REALTIME, &ti); // would be faster
long sec = ti.tv_sec & 0xffff;
long nsec = ti.tv_nsec;
return sec * NANOSEC + nsec;
}
static inline double
realtime(uint64_t t) {
return (double)t / NANOSEC;
}
#endif
struct callpath_node;
struct call_frame {
const void* point;
const void* prototype;
struct icallpath_context* path;
bool tail;
uint64_t call_time;
uint64_t ret_time;
uint64_t sub_cost;
uint64_t real_cost;
uint64_t alloc_co_cost;
uint64_t alloc_start;
};
struct call_state {
lua_State* co;
uint64_t leave_time;
uint64_t leave_alloc;
int top;
struct call_frame call_list[0];
};
struct profile_context {
uint64_t start;
bool increment_alloc_count;
uint64_t alloc_count;
lua_Alloc last_alloc_f;
void* last_alloc_ud;
struct imap_context* cs_map;
struct icallpath_context* callpath;
struct call_state* cur_cs;
};
struct callpath_node {
struct callpath_node* parent;
const void* point;
const char* source;
const char* name;
int line;
int depth;
uint64_t ret_time;
uint64_t count;
uint64_t record_time;
uint64_t alloc_count;
};
static struct callpath_node*
callpath_node_create() {
struct callpath_node* node = (struct callpath_node*)pmalloc(sizeof(*node));
node->parent = NULL;
node->point = NULL;
node->source = NULL;
node->name = NULL;
node->line = 0;
node->depth = 0;
node->ret_time = 0;
node->count = 0;
node->record_time = 0;
node->alloc_count = 0;
return node;
}
uint64_t g_context_ver = 0;
pthread_once_t g_cache_init;
pthread_key_t g_cache_ver;
pthread_key_t g_cache_co;
pthread_key_t g_cache_context;
void cache_init() {
pthread_key_create(&g_cache_ver, NULL);
pthread_key_create(&g_cache_co, NULL);
pthread_key_create(&g_cache_context, NULL);
}
static struct profile_context* get_cache_context(lua_State* L) {
uint64_t v = (uint64_t)((uintptr_t)pthread_getspecific(g_cache_ver));
if (v != g_context_ver) {
return NULL;
}
lua_State* co = (lua_State*)pthread_getspecific(g_cache_co);
if (co != L) {
return NULL;
}
return (struct profile_context*)pthread_getspecific(g_cache_context);
}
void set_cache_context(lua_State* co, struct profile_context* context) {
pthread_setspecific(g_cache_ver, (const void*)g_context_ver);
pthread_setspecific(g_cache_co, (const void*)co);
pthread_setspecific(g_cache_context, (const void*)context);
}
static struct profile_context *
profile_create() {
struct profile_context* context = (struct profile_context*)pmalloc(sizeof(*context));
context->start = 0;
context->cs_map = imap_create();
context->callpath = NULL;
context->cur_cs = NULL;
context->increment_alloc_count = false;
context->alloc_count = 0;
context->last_alloc_f = NULL;
context->last_alloc_ud = NULL;
g_context_ver++;
return context;
}
static void
_ob_free_call_state(uint64_t key, void* value, void* ud) {
pfree(value);
}
static void
profile_free(struct profile_context* context) {
g_context_ver++;
if (context->callpath) {
icallpath_free(context->callpath);
context->callpath = NULL;
}
imap_dump(context->cs_map, _ob_free_call_state, NULL);
imap_free(context->cs_map);
pfree(context);
}
static inline struct call_frame *
push_callframe(struct call_state* cs) {
if(cs->top >= MAX_CALL_SIZE) {
assert(false);
}
return &cs->call_list[cs->top++];
}
static inline struct call_frame *
pop_callframe(struct call_state* cs) {
if(cs->top<=0) {
assert(false);
}
return &cs->call_list[--cs->top];
}
static inline struct call_frame *
cur_callframe(struct call_state* cs) {
if(cs->top<=0) {
return NULL;
}
uint64_t idx = cs->top-1;
return &cs->call_list[idx];
}
static inline struct profile_context *
_get_profile(lua_State* L) {
struct profile_context* addr = get_cache_context(L);
if (addr) {
return addr;
}
lua_rawgetp(L, LUA_REGISTRYINDEX, KEY);
addr = (struct profile_context*)lua_touserdata(L, -1);
lua_pop(L, 1);
if (addr) {
set_cache_context(L, addr);
}
return addr;
}
static struct icallpath_context*
get_frame_path(struct profile_context* context, lua_State* co, lua_Debug* far, struct icallpath_context* pre_callpath, struct call_frame* frame) {
if (!context->callpath) {
struct callpath_node* node = callpath_node_create();
node->name = "total";
node->source = node->name;
context->callpath = icallpath_create(0, node);
}
struct icallpath_context* path = pre_callpath;
if (!path) {
path = context->callpath;
}
struct call_frame* cur_cf = frame;
uint64_t k = (uint64_t)((uintptr_t)cur_cf->prototype);
struct icallpath_context* child_path = icallpath_get_child(path, k);
if (!child_path) {
struct callpath_node* path_parent = (struct callpath_node*)icallpath_getvalue(path);
struct callpath_node* node = callpath_node_create();
node->parent = path_parent;
node->point = cur_cf->prototype;
node->depth = path_parent->depth + 1;
node->ret_time = 0;
node->record_time = 0;
node->count = 0;
node->alloc_count = 0;
child_path = icallpath_add_child(path, k, node);
}
path = child_path;
struct callpath_node* cur_node = (struct callpath_node*)icallpath_getvalue(path);
if (cur_node->name == NULL) {
const char* name = NULL;
#ifdef USE_EXPORT_NAME
lua_getinfo(co, "nSl", far);
name = far->name;
#else
lua_getinfo(co, "Sl", far);
#endif
int line = far->linedefined;
const char* source = far->source;
char flag = far->what[0];
if (flag == 'C') {
lua_Debug ar2;
int i=0;
int ret = 0;
do {
i++;
ret = lua_getstack(co, i, &ar2);
flag = 'C';
if(ret) {
lua_getinfo(co, "Sl", &ar2);
if(ar2.what[0] != 'C') {
line = ar2.currentline;
source = ar2.source;
break;
}
}
}while(ret);
}
cur_node->name = name ? name : "null";
cur_node->source = source ? source : "null";
cur_node->line = line;
}
return path;
}
static void*
_resolve_alloc(void *ud, void *ptr, size_t osize, size_t nsize) {
struct profile_context* context = (struct profile_context*)ud;
size_t old = ptr == NULL ? 0 : osize;
if (nsize > 0 && nsize > old && context->increment_alloc_count) {
context->alloc_count += (nsize - old);
}
void* p = context->last_alloc_f(context->last_alloc_ud, ptr, osize, nsize);
return p;
}
static void
_resolve_hook(lua_State* L, lua_Debug* far) {
struct profile_context* context = _get_profile(L);
if(context->start == 0) {
return;
}
uint64_t cur_time = gettime();
context->increment_alloc_count = false;
int event = far->event;
struct call_state* cs = context->cur_cs;
if (!context->cur_cs || context->cur_cs->co != L) {
uint64_t key = (uint64_t)((uintptr_t)L);
cs = imap_query(context->cs_map, key);
if (cs == NULL) {
cs = (struct call_state*)pmalloc(sizeof(struct call_state) + sizeof(struct call_frame)*MAX_CALL_SIZE);
cs->co = L;
cs->top = 0;
cs->leave_time = 0;
cs->leave_alloc = 0;
imap_set(context->cs_map, key, cs);
}
if (context->cur_cs) {
context->cur_cs->leave_time = cur_time;
context->cur_cs->leave_alloc = context->alloc_count;
}
context->cur_cs = cs;
}
if (cs->leave_time > 0) {
assert(cur_time >= cs->leave_time);
uint64_t co_cost = cur_time - cs->leave_time;
uint64_t co_alloc = context->alloc_count - cs->leave_alloc;
int i = 0;
for (; i < cs->top; i++) {
cs->call_list[i].sub_cost += co_cost;
cs->call_list[i].alloc_co_cost += co_alloc;
}
cs->leave_time = 0;
cs->leave_alloc = 0;
}
assert(cs->co == L);
if (event == LUA_HOOKCALL || event == LUA_HOOKTAILCALL) {
const void* point = NULL;
if (far->i_ci && far->i_ci->func) {
point = far->i_ci->func;
} else {
lua_getinfo(L, "f", far);
point = lua_topointer(L, -1);
}
struct icallpath_context* pre_callpath = NULL;
struct call_frame* pre_frame = cur_callframe(cs);
if (pre_frame) {
pre_callpath = pre_frame->path;
}
struct call_frame* frame = push_callframe(cs);
frame->point = point;
frame->tail = event == LUA_HOOKTAILCALL;
frame->sub_cost = 0;
frame->call_time = cur_time;
frame->alloc_co_cost = 0;
frame->alloc_start = context->alloc_count;
frame->prototype = point;
if (far->i_ci && ttisclosure(s2v(far->i_ci->func))) {
Closure *cl = clvalue(s2v(far->i_ci->func));
if (cl && cl->c.tt == LUA_VLCL) {
frame->prototype = cl->l.p;
}
}
frame->path = get_frame_path(context, L, far, pre_callpath, frame);
} else if (event == LUA_HOOKRET) {
int len = cs->top;
if (len <= 0) {
context->increment_alloc_count = true;
return;
}
bool tail_call = false;
do {
struct call_frame* cur_frame = pop_callframe(cs);
struct callpath_node* cur_path = (struct callpath_node*)icallpath_getvalue(cur_frame->path);
uint64_t total_cost = cur_time - cur_frame->call_time;
uint64_t real_cost = total_cost - cur_frame->sub_cost;
uint64_t alloc_count = context->alloc_count - cur_frame->alloc_start - cur_frame->alloc_co_cost;
assert(context->alloc_count >= (cur_frame->alloc_start + cur_frame->alloc_co_cost));
assert(cur_time >= cur_frame->call_time && total_cost >= cur_frame->sub_cost);
cur_frame->ret_time = cur_time;
cur_frame->real_cost = real_cost;
cur_path->ret_time = cur_path->ret_time == 0 ? cur_time : cur_path->ret_time;
cur_path->record_time += real_cost;
cur_path->count++;
cur_path->alloc_count += alloc_count;
struct call_frame* pre_frame = cur_callframe(cs);
tail_call = pre_frame ? cur_frame->tail : false;
}while(tail_call);
}
context->increment_alloc_count = true;
}
struct dump_call_path_arg {
lua_State* L;
uint64_t record_time;
uint64_t count;
uint64_t index;
uint64_t alloc_count;
};
static void _dump_call_path(struct icallpath_context* path, struct dump_call_path_arg* arg);
static void _dump_call_path_child(uint64_t key, void* value, void* ud) {
struct dump_call_path_arg* arg = (struct dump_call_path_arg*)ud;
_dump_call_path((struct icallpath_context*)value, arg);
lua_seti(arg->L, -2, ++arg->index);
}
static void _dump_call_path(struct icallpath_context* path, struct dump_call_path_arg* arg) {
lua_checkstack(arg->L, 3);
lua_newtable(arg->L);
struct dump_call_path_arg child_arg;
child_arg.L = arg->L;
child_arg.record_time = 0;
child_arg.count = 0;
child_arg.index = 0;
child_arg.alloc_count = 0;
if (icallpath_children_size(path) > 0) {
lua_newtable(arg->L);
icallpath_dump_children(path, _dump_call_path_child, &child_arg);
lua_setfield(arg->L, -2, "children");
}
struct callpath_node* node = (struct callpath_node*)icallpath_getvalue(path);
uint64_t alloc_count = node->alloc_count > child_arg.alloc_count ? node->alloc_count : child_arg.alloc_count;
uint64_t count = node->count > child_arg.count ? node->count : child_arg.count;
uint64_t rt = realtime(node->record_time) * MICROSEC;
uint64_t record_time = rt > child_arg.record_time ? rt : child_arg.record_time;
arg->record_time += record_time;
arg->count += count;
arg->alloc_count += alloc_count;
char name[512] = {0};
snprintf(name, sizeof(name)-1, "%s %s:%d", node->name ? node->name : "", node->source ? node->source : "", node->line);
lua_pushstring(arg->L, name);
lua_setfield(arg->L, -2, "name");
lua_pushinteger(arg->L, count);
lua_setfield(arg->L, -2, "count");
lua_pushinteger(arg->L, record_time);
lua_setfield(arg->L, -2, "value");
lua_pushinteger(arg->L, node->ret_time);
lua_setfield(arg->L, -2, "rettime");
lua_pushinteger(arg->L, alloc_count);
lua_setfield(arg->L, -2, "alloc_count");
}
static void dump_call_path(lua_State* L, struct icallpath_context* path) {
struct dump_call_path_arg arg;
arg.L = L;
arg.record_time = 0;
arg.count = 0;
arg.index = 0;
arg.alloc_count = 0;
_dump_call_path(path, &arg);
}
static int
get_all_coroutines(lua_State* L, lua_State** result, int maxsize) {
int i = 0;
struct global_State* lG = L->l_G;
result[i++] = lG->mainthread;
struct GCObject* obj = lG->allgc;
while (obj && i < maxsize) {
if (obj->tt == LUA_TTHREAD) {
result[i++] = gco2th(obj);
}
obj = obj->next;
}
return i;
}
static int
_lstart(lua_State* L) {
struct profile_context* context = _get_profile(L);
if (context) {
return 0;
}
#ifdef USE_GOOGLE_PROFILER
printf("\n==============nProfilerStart================\n");
ProfilerStart("log/swt.prof");
#endif
// init registry
context = profile_create();
lua_pushlightuserdata(L, context);
lua_rawsetp(L, LUA_REGISTRYINDEX, KEY);
context->start = gettime();
context->last_alloc_f = lua_getallocf(L, &context->last_alloc_ud);
lua_setallocf(L, _resolve_alloc, (void*)context);
lua_State* states[MAX_CO_SIZE] = {0};
int i = get_all_coroutines(L, states, MAX_CO_SIZE);
for (i = i - 1; i >= 0; i--) {
lua_sethook(states[i], _resolve_hook, LUA_MASKCALL | LUA_MASKRET, 0);
}
context->increment_alloc_count = true;
return 0;
}
static int
_lstop(lua_State* L) {
struct profile_context* context = _get_profile(L);
if (!context) {
return 0;
}
context->increment_alloc_count = false;
void* current_ud = NULL;
lua_getallocf(L, &current_ud);
if (current_ud == context) {
lua_setallocf(L, context->last_alloc_f, context->last_alloc_ud);
}
lua_State* states[MAX_CO_SIZE] = {0};
int i = get_all_coroutines(L, states, MAX_CO_SIZE);
for (i = i - 1; i >= 0; i--) {
lua_sethook(states[i], NULL, 0, 0);
}
profile_free(context);
lua_pushlightuserdata(L, KEY);
lua_pushnil(L);
lua_settable(L, LUA_REGISTRYINDEX);
#ifdef USE_GOOGLE_PROFILER
printf("\n==============ProfilerStop================\n");
ProfilerStop();
#endif
return 0;
}
static int
_lmark(lua_State* L) {
struct profile_context* context = _get_profile(L);
if (!context) {
return 0;
}
lua_State* co = lua_tothread(L, 1);
if(co == NULL) {
co = L;
}
if(context->start != 0) {
lua_sethook(co, _resolve_hook, LUA_MASKCALL | LUA_MASKRET, 0);
}
lua_pushboolean(L, context->start != 0);
return 1;
}
static int
_lunmark(lua_State* L) {
struct profile_context* context = _get_profile(L);
if (!context) {
return 0;
}
lua_State* co = lua_tothread(L, 1);
if(co == NULL) {
co = L;
}
lua_sethook(co, NULL, 0, 0);
return 0;
}
static int
_ldump(lua_State* L) {
struct profile_context* context = _get_profile(L);
if (context && context->callpath) {
context->increment_alloc_count = false;
uint64_t record_time = realtime(gettime() - context->start) * MICROSEC;
lua_pushinteger(L, record_time);
dump_call_path(L, context->callpath);
context->increment_alloc_count = true;
return 2;
}
return 0;
}
int
luaopen_profile_c(lua_State* L) {
pthread_once(&g_cache_init, cache_init);
luaL_checkversion(L);
luaL_Reg l[] = {
{"start", _lstart},
{"stop", _lstop},
{"mark", _lmark},
{"unmark", _lunmark},
{"dump", _ldump},
{NULL, NULL},
};
luaL_newlib(L, l);
return 1;
}