bpftrace系统分析脚本实战

最近ebpf比较火,作为运维,大家应该比较关注ebpf对系统问题分析定位的能力。下面分享一些我实际在用的bpftrace脚本案例。

案例一,重要文件被删除

有时候线上某个重要文件不翼而飞,我们需要知道是谁删的,可以利用相关跟踪点找到罪魁祸首。。。

  • 通过静态鳄霸皮肤多少钱跟踪点跟踪unlink相关的跟踪点(tracephtml文件怎么打开oint)
#!/usr/local/bin/bpftrace

/*
tracepoint:syscalls:sys_enter_unlinkat
int __syscall_nr
int dfd
const char • const char * pathname
int flag
*/

BEGIN
{
printf("%-20s %-10s %-10s %-10s %-20s %s\n","TIME", "USERNAME", "UID","PID","COMM", "FNAME");
}
tracepoint:syscalls:sys_enter_unlinkat
{
printf("%-20s %-10s %-10u %-10u %-20s %s\n", strftime("%F-%T", nsecs), username, uid, pid, comm, str(args->pathname));
}

# 执行结果
$ sudo bpftrace rm.bt
Attaching 2 probes...
TIME USERNAME UID PID COMM FNAME
2021-02-26-18:35:02 obbobbxy 1008 130183 rm abc
2021-02-26-18:35:02 obbobbxy 1008 130183 rm 123
2021-02-26-18:35:26 obbobbxy 1008 130209 rm 123
2021-02-26-18:35:34 obbobbxy 1008 130212 rm 123
2021-02-26-18:35:38 root 0 130508 rm /var/rudder/cfengine-community/state/cf_lock.lmdb
2021-02-26-18:35:38 root 0 130509 rm /var/rudder/cfengine-community/state/cf_lock.lmdb.lock
2021-02-26-18:35:40 obbobbxy 1008 130704 rm abc
2021-02-26-18:35:40 obbobbxy 1008 130704 rm 123
2021-02-26-18:36:05 root 0 634 systemd-logind S.gpg-agent
2021-02-26-18:36:05 root 0 634 systemd-logind S.gpg-agent.extra
2021-02-26-18:36:05 root 0 634 systemd-logind S.gpg-agent.ssh
2021-02-26-18:36:05 root 0 634 systemd-logind S.dirmngr
2021-02-26-18:36:05 root 0 634 systemd-logind S.gpg-agent.browser
2021-02-26-18:36:05 root 0 634 systemd-logind gnupg
2021-02-26-18:36:05 root 0 634 systemd-logind private
2021-02-26-18:36:05 root 0 634 systemd-logind notify
2021-02-26-18:36:05 root 0 634 systemd-logind transient
2021-02-26-18:36:05 root 0 634 systemd-logind systemd
  • 通过动态跟踪点跟踪unlink相关的跟踪点(kprobe)
#!/usr/local/bin/bpftrace

// header file path: /usr/src/linux-headers-$(uname -r | sed 's/-amd64//')-common/include
#include <linux/dcache.h>

/*
https://www.kernel.org/doc/htmldocs/filesystems/API• https://www.kernel.org/doc/htmldocs/filesystems/API-vfs-unlink.html : int vfs_unlink ( struct inode * dir, struct dentry * dentry, struct inode ** delegated_inode);
https://docs.huihoo.com/doxygen/linux/kernel/3.7/structdentry.html : struct dentry -> struct qstr d_name
https://docs.huihoo.com/doxygen/linux/kernel/3.7/structqstr.html : struct qstr d_name • https://docs.huihoo.com/doxygen/linux/kernel/3.7/structqstr.html : struct qstr d_name -> const unsigned char * name
filename: ((struct dentry *)arg1)->d_name.name
*/

BEGIN
{
@t = 0;
@c = 0;
@duration = $1;
@maxcount = $2;
if (@duration == 0) {
@duration = 10;
}
printf("%-20s %-5s %-10s %-10s %-10s %-20s %s\n","TIME", "TYPE", "USERNAME", "UID","PID","COMM", "FNAME");
}
kprobe:vfs_unlink
{
printf("%-20s %-5s %-10s %-10u %-10u %-20s %s\n", strftime("%F-%T", nsecs), "file", username, uid, pid, comm, str(((struct dentry *)arg1)->d_name.name));
@c++;
}
kprobe:vfs_rmdir
{
printf("%-20s %-5s %-10s %-10u %-10u %-20s %s\n", strftime("%F-%T", nsecs), "dir", username, uid, pid, comm, str(((struct dentry *)arg1)->d_name.name));
@c++;
}
interval:s:1
{
@t++;
if ( @t >= @duration ) {
clear(@t);
clear(@c);
clear(@maxcount);
exit();
}
if ( @maxcount != 0 && @c > @maxcount ) {
clear(@t);
clear(@c);
clear(@duration);
exit();
}
}
# 脚本默认运行10s,可以通过第一个位置参数传入sudo ./watch_rm.bt 30,结束后会打印运行时间内所有删除文件/目录的进程,第二个参数可以指定最多获取记录数
# sudo ./watch_rm.bt 5 30, 表示脚本最多运行5s,最多获取30条记录,如果不指定第二个参数,默认就按超时退出;
$ sudo ./watch_rm.bt
Attaching 4 probes...
TIME TYPE USERNAME UID PID COMM FNAME
2021-02-23-22:43:59 dir obbobbxy 1008 187638 rm dir1
2021-02-23-22:43:59 dir obbobbxy 1008 187639 rm dir2
2021-02-23-22:43:59 dir obbobbxy 1008 187640 rm dir3
2021-02-23-22:43:59 file obbobbxy 1008 187641 rm file1
2021-02-23-22:43:59 file obbobbxy 1008 187642 rm file2
2021-02-23-22:43:59 file obbobbxy 1008 187643 rm file3
2021-02-23-22:43:59 file obbobbxy 1008 187644 rm file4
2021-02-23-22:43:59 file obbobbxy 1008 187645 rm file5
2021-02-23-22:43:59 file obbobbxy 1008 187646 rm file6
2021-02-23-22:43:59 file obbobbxy 1008 187647 rm file7
2021-02-23-22:43:59 file obbobbxy 1008 187648 rm file8
2021-02-23-22:43:59 file obbobbxy 1008 187649 rm file9

案例二,进程top流量

#!/usr/local/bin/bpftrace

#include <linux/fs.h>
#include <net/sock.h>

kprobe:sock_recvmsg,
kprobe:sock_sendmsg
{
@socket[tid] = arg0;
}

kretprobe:sock_recvmsg
{
if (retval < 0x7fffffff) {
@read_bytes[comm] = hist(retval);
}
delete(@socket[tid]);
}

kretprobe:sock_sendmsg
{
if (retval < 0x7fffffff) {
@write_bytes[comm] = hist(retval);
}
delete(@socket[tid]);
}

END
{
clear(@socket);
}

# 以直方图形式输出进程socket读写字节数,如果需要,则可以加上pid/sport/dport
$ sudo bpftrace socksize.bt
Attaching 5 probes...
^C

@read_bytes[ospfd]:
[64, 128) 1 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|

@read_bytes[sshd]:
[32, 64) 1 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|

@read_bytes[java]:
[8, 16) 2 |@@@@@@@@@@@ |
[16, 32) 9 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[32, 64) 5 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[64, 128) 2 |@@@@@@@@@@@ |

@read_bytes[nginx]:
[0] 107 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[1] 1 | |
[2, 4) 0 | |
[4, 8) 0 | |
[8, 16) 0 | |
[16, 32) 0 | |
[32, 64) 0 | |
[64, 128) 20 |@@@@@@@@@ |
[128, 256) 113 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[256, 512) 103 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[512, 1K) 35 |@@@@@@@@@@@@@@@@ |
[1K, 2K) 5 |@@ |


# 使用bcc的tcptop工具也可以做到
$ sudo /usr/share/bcc/tools/tcptop 1 1
Tracing... Output every 1 secs. Hit Ctrl-C to end
23:37:39 loadavg: 0.49 0.48 0.35 2/831 85332

PID COMM LADDR RADDR RX_KB TX_KB
82242 nginx 192.168.3.185:52456 192.168.9.57:7130 2 4
82242 nginx 192.168.3.185:47782 192.168.7.66:7130 2 4
82242 nginx 192.168.3.185:43064 192.168.1.51:7130 2 4

案例三,鳄霸皮肤多少钱redis 热key发现

# 查看redis源码,可以跟踪lookupkey相关的函数
#!/usr/local/bin/bpftrace

BEGIN
{
@t=0;
@duration=10;
if ($1 > 0) {
@duration=$1;
}
printf("Tracing /usr/bin/redis-server %d seconds: \n\n", @duration);
}

/*
https://github.com/redis/redis/blob/8f9958dc24fa5992d3d10f6b9caf999e1beee4e5/src/server.h#L666:
typedef struct redisObject {
unsigned type:4;
unsigned encoding:4;
unsigned lru:LRU_BITS;
int refcount;
void *ptr;
} robj;

https://github.com/redis/redis/blob/8f9958dc24fa5992d3d10f6b9caf999e1beee4e5/src/db.c#L168:
robj *lookupKeyReadOrReply(client *c, robj *key, robj *reply) {
*/

// 注意lookupKey在get/set时候都会调用,lookupKeyReadOrReply只在get时调用
//u:/usr/bin/redis-server:lookupKeyReadOrReply,
u:/usr/bin/redis-server:lookupKey
{
// arg1: robj *key
// $key: key
• // $key: key + (4*4) offset # unsigned 4 bytes, int 4 bytes
$key = str(arg1+16);
@keys[$key] = count();
}

END
{
print(@keys, 10);
clear(@keys);
}

interval:s:1
{
@t++;
if (@t > @duration) {
print(@keys);
clear(@keys);
clear(@t);
clear(@duration);
exit();
}
}

$ sudo ./redis-uprobe.bt 99999 # 默认10自动退出,可以通过第一个位置变量传入跟踪时长,最多输出top10的key;同时另外一个终端执行/usr/bin/redis-benchmark -q模拟redis请求
Attaching 4 probes...
Tracing /usr/bin/redis-server 99999 seconds:

keys[counter:__rand_int__]: 100000
@keys[myset]: 200000
@keys[mylist]: 900000
@keys[key:__rand_int__]: 1200000

@duration: 99999
@t: 91

案例四,审计系统shell命令执行记录

通常公司因为某些资质申请或者证书审核的原因,会需要做一些系统安全或者日系统运维工程师志审计,需要有服二八偏分务器命令鳄霸皮肤多少钱行执行记录。我们通常使用auditd工具实现,同样的,我们用ebpf的uprobe能力也能简单实现相关的功能。

#!/usr/local/bin/bpftrace

BEGIN
{
printf("%-20s %-6s %-10s %s\n", "TIME", "PID", "USERNAME", "COMMAND");
}

/*
int shell_execve PARAMS((char *, char , char ));
shell_execve (command, args, env);
*/
u:/bin/bash:shell_execve
{
time("%F-%T ");
printf("%-6d %-10s ", pid, username);
join(arg1); // argv
}

$ sudo ./bash-readline.bt
Attaching 2 probes...
TIME PID USERNAME COMMAND
2021-02-28-19:16:31 65488 obbobbxy bash a.sh
2021-02-28-19:16:31 65489 obbobbxy /bin/echo 123 300c
2021-02-28-19:16:31 65490 obbobbxy /bin/echo abc asdfadf
2021-02-28-19:16:31 65491 obbobbxy whoami
2021-02-28-19:16:31 65492 obbobbxy bpftrace --version
2021-02-28-19:16:34 65495 obbobbxy cat a.sh
2021-02-28-19:17:15 65523 root ps aux
2021-02-28-19:17:15 65524 root grep /usr/bin/osquery
2021-02-28-19:17:15 65525 root grep -v grep
2021-02-28-19:17:15 65526 root wc -l

小结

目前debian9以上系统都是4.x内核,基本利用bpftrace或者bcc-tools能实现许多跟踪点html5的信息跟踪,辅助我们日常工作,提升问题定位能力。

debian11以后系统默认都启用了系统运维工资一般多少BTF,Chtml标签O-RE不html代码远了。