python實時分析日誌的一個小指令碼分享

NO IMAGE

前言

大家都知道Web運維總要關注相關域名的實時2xx/s、4xx/s、5xx/s、響應時間、頻寬等這些指標,之前的日誌是五分鐘一分割,簡單的用awk就可以了,現在由於要推送日誌到ELK,繼續之前五分鐘一分割會有問題,就改為一天分割一次。改成一天一分割後,顯然再繼續用Shell就不合適了,於是就用Python寫了下。

方法如下:

指令碼主要運用了檔案的seek和tell函式,原理如下:

       1.加入crontab,每5分鐘執行一次

       2.只分析從上次讀取日誌檔案的結束位置到這次讀取檔案時的末尾位置之間的日誌,出結果
可以使用zabbix_sender把結果傳送到zabbix server或者直接使用zabbix agent來讀取這個檔案取資料,配合zabbix出圖、做報警,程式碼如下:


#!/usr/bin/env python
#coding: utf-8
from __future__ import division
import os
LOG_FILE = '/data0/logs/nginx/xxxx-access_log'
POSITION_FILE = '/tmp/position.log'
STATUS_FILE = '/tmp/http_status'
#crontab 執行時間
CRON_TIME = 300
def get_position():
#第一次讀取日誌檔案,POSITION_FILE為空
if not os.path.exists(POSITION_FILE):
start_position = str(0)
end_position = str(os.path.getsize(LOG_FILE))
fh = open(POSITION_FILE,'w')
fh.write('start_position: %s\n' % start_position)
fh.write('end_position: %s\n' % end_position)
fh.close()
os._exit(1)
else:
fh = open(POSITION_FILE)
se = fh.readlines()
fh.close()
#其他意外情況導致POSITION_FILE內容不是兩行
if len(se) != 2:
os.remove(POSITION_FILE)
os._exit(1)
last_start_position,last_end_position = [item.split(':')[1].strip() for item in se]
start_position = last_end_position
end_position = str(os.path.getsize(LOG_FILE))
#日誌輪轉導致start_position > end_position
#print start_position,end_position
if start_position > end_position:
start_position = 0
#日誌停止滾動時
elif start_position == end_position:
os._exit(1)
#print start_position,end_position
fh = open(POSITION_FILE,'w')
fh.write('start_position: %s\n' % start_position)
fh.write('end_position: %s\n' % end_position)
fh.close()
return map(int,[start_position,end_position])
def write_status(content):
fh = open(STATUS_FILE,'w')
fh.write(content)
fh.close()
def handle_log(start_position,end_position):
log = open(LOG_FILE)
log.seek(start_position,0)
status_2xx,status_403,status_404,status_500,status_502,status_503,status_504,status_all,rt,bandwidth = 0,0,0,0,0,0,0,0,0,0
while True:
current_position = log.tell()
if current_position >= end_position:
break
line = log.readline()
line = line.split(' ')
host,request_time,time_local,status,bytes_sent = line[1],line[3],line[5],line[10],line[11]
#print host,request_time,time_local,status,bytes_sent
status_all  = 1
try:
rt  = float(request_time.strip('s'))
bandwidth  = int(bytes_sent)
except:
pass
if status == '200' or status == '206':
status_2xx  = 1
elif status == '403':
status_403  = 1
elif status == '404':
status_404  = 1
elif status == '500':
status_500  = 1
elif status == '502':
status_502  = 1
elif status == '503':
status_503  = 1
elif status == '504':
status_504  = 1
log.close()
#print "status_2xx: %s\nstatus_403: %s\nstatus_404: %s\nstatus_500: %s\nstatus_502: %s\nstatus_503: %s\nstatus_504: %s\nstatus_all: %s\nrt: %s\nbandwidth: %s\n" % (status_2xx/CRON_TIME,status_403/CRON_TIME,status_404/CRON_TIME,status_500/CRON_TIME,status_502/CRON_TIME,status_503/CRON_TIME,status_504/CRON_TIME,status_all/CRON_TIME,rt/status_all,bandwidth/CRON_TIME)
write_status("status_2xx: %s\nstatus_403: %s\nstatus_404: %s\nstatus_500: %s\nstatus_502: %s\nstatus_503: %s\nstatus_504: %s\nstatus_all: %s\nrt: %s\nbandwidth: %s\n" % (status_2xx/CRON_TIME,status_403/CRON_TIME,status_404/CRON_TIME,status_500/CRON_TIME,status_502/CRON_TIME,status_503/CRON_TIME,status_504/CRON_TIME,status_all/CRON_TIME,rt/status_all,bandwidth/CRON_TIME))
if __name__ == '__main__':
start_position,end_position = get_position()
handle_log(start_position,end_position)

看下分析的結果:


cat /tmp/http_status
status_2xx: 17.3333333333
status_403: 0.0
status_404: 1.0
status_500: 0.0
status_502: 0.0
status_503: 0.0
status_504: 0.0
status_all: 20.0
rt: 0.0782833333333
bandwidth: 204032.0

後來發現有點問題,start_position、end_position 使用字串比較會有問題,如下:


In [5]: '99772400' > '100227572'
Out[5]: True
In [6]: int('99772400') > int('100227572')
Out[6]: False

因此,更正為:


#日誌輪轉導致start_position > end_position
#print start_position,end_position
if int(start_position) > int(end_position):
start_position = 0
#日誌停止滾動時
elif int(start_position) == int(end_position):
os._exit(1)

總結

以上就是這篇文章的全部內容了,希望本文的內容對大家的學習或者工作能帶來一定的幫助,如果有疑問大家可以留言交流,謝謝大家對指令碼之家的支援。

您可能感興趣的文章:

詳解Python中的日誌模組loggingpython動態監控日誌內容的示例詳解python之配置日誌的幾種方式python 日誌增量抓取實現方法