Erlang格式化日志

By AverageJoeWang
 标签:

一.需求描述

格式化服务器日志,提取出每个日志的ip,id,流量,处理日志容量30G,要求在1小时内完成

二.完成情况

分别使用erlang与c语言处理文件,c语言处理1.18G日志需要23s(虚拟机下,1核,2G内存),erlang单进程用时9min20s

三.erlang代码

3.1.read_line_high1.erl

-module(read_line_high1).
-export([file_read_write/2, loop_read_write/2, start/0,decode_line_hls/1]).

decode_line_hls(String) ->
    RE = "^(.*?\\b)\\s.*?/\\d*:(\\d{2}):(\\d{2}):(\\d{2}\\b)\\s.*\].*/(wtest_|test_|w|ba|bp)(\\d{5,})\..*HTTP.*\\s2\\d{2}\\s(\\d*)\\s.*$",
    case re:run(String, RE, [{capture,[1,2,3,4,6,7],list}]) of
            {match,[Ip,TimeH,TimeM,TimeS,LiveId,DownFlow]} ->
                StartSec = calendar:time_to_seconds({list_to_integer(TimeH),list_to_integer(TimeM),list_to_integer(TimeS)}),
                {ok,{hls,Ip,list_to_integer(LiveId),StartSec,list_to_integer(DownFlow)}};
       nomatch ->
           fail
    end.


file_read_write(Filename1,Filename2)->
    {ok,S} = file:open(Filename1,read),
    {ok,Ss} = file:open(Filename2,write),
    loop_read_write(S,Ss).




%%循环读与循环写
loop_read_write(S,eof,Ss)->
    file:close(S),
    file:close(Ss);

loop_read_write(S,"",Ss)->
    loop_read_write(S,io:get_line(S,''),Ss);

loop_read_write(S,Str,Ss)->
    case decode_line_hls(Str) of
        %%{ok,Decode}->io:format("~p~n",[Decode]);
        {ok, Decode}->io:format(Ss, "~p~n", [Decode]);
        %%{ok, Decode}->ok;
        fail->ok
    end,
    loop_read_write(S,io:get_line(S,''),Ss).

loop_read_write(S,Ss) ->
    loop_read_write(S,"",Ss).
%%循环结束





start()->
    file_read_write("simple.log","simple.format.log").%simlep.log是未处理日志,simple.format.log是处理后写入的文件

3.2.编译运行

#编译
erlc(.read_line_high1).
#运行
erl -noshell -s read_line_high1 start -s init stop.
#测试运行时间
time erl -noshell -s read_line_high1 start -s init stop.

四.C语言

4.1.format_log_c.c

#include <stdio.h>
#include <string.h>
#define MAX_LINE 1024
#define N 256

int main()
{
    FILE  *fr;
    if((fr = fopen("hard.log","r")) == NULL)//需要处理的日志文件
    {
        printf("open test.log file failed!\n");
        return -1;
    }
    FILE *fw;
    if((fw = fopen("hard.format.txt", "w")) == NULL)//最后存储的格式化文件
    {
        printf("open test.format.txt file failed!\n");
        return -1;
    }
    char buf[MAX_LINE];
    int len;
    char str[MAX_LINE] = "";

    char ip[N] = "";//ip
    char id[N] = "";//id
    char start[N] = "";//流量开始
    char end[N] = "";//流量结束
    char http[N] = "";//http格式
//    119.135.123.143 - - [06/Jul/2016:00:07:13 +0800] "GET http://hls-w.quklive.com/live/w1467637989950943/playlist.m3u8 HTTP/1.1" 200 478 "-" "Samsung SM-G5308W stagefright/1.2 (Linux;Android 4.4.4)"
    while(fgets(buf,MAX_LINE,fr) != NULL)
    {
        len = strlen(buf);
        buf[len - 1] = '\0';
        sscanf(buf,"%s - - %*s%*s%*s%s%*s%s%s",ip, http,start,end);//格式化1
        ip[N - 1] = '\0';
        id[N - 1] = '\0';
        sscanf(http,"%*31s%[^/]",id);//格式化2
        fprintf(fw,"%s,%s,%s,%s\n",ip,id,start,end);//写入文件
    }
    fclose(fr);
    fclose(fw);
    return 0;
}

4.2.编译运行

gcc format_log_c.c -o format_log
./format_log
#测试时间
time ./format_log