天天看點

logstash 中正則grok

調試正則的工具: http://grokdebug.herokuapp.com/

注意:add 的field或者tag或者解析時字段的指令一定不能是關鍵字,如type

解析例子:

input {
       beats {
            add_field => {"myid"=>"nginx"}
        port => 5043
    }

    beats {
             add_field => {"myid"=>"java"}
         port =>5044
    }
}


filter {
        if [myid] == "nginx" {
       grok {
         match => { 
            "message" => "^(?<domain>%{IP:ip}|(?:%{NOTSPACE:subsite}\.)?(?<site>[-a-zA-Z0-9]+?).com|%{NOTSPACE:unknown}) %{IPORHOST:dayuip} - (?<user>[a-zA-Z\.\@\-\+_%]+) \[%{HTTPDATE:timestamp}\] \"%{WORD:verb} (?<request_path>(?<biz>\/[^/?]*)%{URIPATH:}?)(?:%{URIPARAM:request_param})? HTTP/%{NUMBER:httpversion}\" %{NUMBER:response} (?:%{NUMBER:bytes}|-) (?:%{BASE10NUM:request_duration}|-) (?:\"(?:%{URI:referrer}|-)\"|%{QS:referrer}) %{QS:agent} \"(?:%{IPORHOST:clientip}(?:[^\"]*)|-)\" %{QS:uidgot} %{QS:uidset} \"(?:[^\" ]* )*(?<upstream>[^ \"]*|-)\"$"
            }
            }

         date {
              locale => "en"
          timezone => "Asia/Shanghai"
          match => [ "timestamp", "dd/MMM/yyyy:HH:mm:ss Z" ]
        }   

         mutate {
            convert => { "bytes" => "integer" "request_duration" => "float"}
     }
     }


     if [myid] == "java" {
         if [source] =~ /.+-phplog.log/  {   
             grok {  
               match => {
                     "message"  => "\[entry\]\[ts\](?<ts>.*)\[/ts\]\[lv\](?<lv>.*)\[/lv\]\[th\](?<th>.*)\[/th\]\[lg\](?<lg>.*)\[/lg\]\[cl\](?<cl>.*)\[/cl\]\[m\](?<m>.*)\[/m\]\[ln\](?<ln>.*)\[/ln\]\[bsid\](?<bsid>.*)\[/bsid\]\[esid\](?<esid>.*)\[/esid\]\[txt\](?<txt>.*)\[/txt\]\[proj\](?<proj>.*)\[/proj\]\[iid\](?<iid>.*)\[/iid\]\[file\](?<file>.*)\[/file\]\[ex\](?<ex>.*)\[/ex\]\[type\](?<logtype>.*)\[/type\]\[/entry\]"
                       }
                 }

            mutate {
        #去掉沒用的字段
        remove_field => ["type","logtype"]
            }

        } else {
           grok {
               match => {
            "message" => "\[entry\]\[ts\](?<ts>.*)\[/ts\]\[lv\](?<lv>.*)\[/lv\]\[th\](?<th>.*)\[/th\]\[lg\](?<lg>.*)\[/lg\]\[cl\](?<cl>.*)\[/cl\]\[m\](?<m>.*)\[/m\]\[ln\](?<ln>.*)\[/ln\]\[bsid\](?<bsid>.*)\[/bsid\]\[esid\](?<esid>.*)\[/esid\](\[cmid\](?<cmid>.*)\[/cmid\])?\[txt\](?<txt>.*)\[/txt\]\[ex\](?<ex>.*)\[/ex\]\[/entry\]"
            }
        }

          grok {
           match => {
                "source" => "(?<proj>[^/]+)-(?<iid>\w+)-\w+\.log"
                }
        }
       }
       mutate {
            rename => {
                "source" => "file"
                "offset" => "seq"
            }
          }

      mutate {
        #去掉沒用的字段
        remove_field => ["input_type","count","tags","message","@version","beat","fields","offset","source"]
            }
      date {
            match => ["ts",'yyyy-MM-dd$HH:mm:ss.SSS','yyyy-MM-dd$HH:mm:ss.SSSZ']
          }
    } # endif_javalog
}



output{

     if [myid] == "nginx" {
       elasticsearch {
            hosts => ["192.168.5.201:9200"]
            index => "log-nginx-%{+YYYY.MM.dd}"
        }

      http {
          format=>"json"
          http_method=>"post"
#   #     url => "http://192.168.1.68:8990/api/v1/metrics"
          url => "http://agg.we.com/api/v1/acclog"
     }
     }


    if [myid] == "java" {
                if [host] == "zy-java1"
                {
            elasticsearch {
                hosts => ["192.168.5.201:9200"]
                index => "log-java-call-uat-%{+YYYY.MM.dd}"
            }
        }
                if [host] == "JAVA1" 
                {
            elasticsearch {
                hosts => ["192.168.5.201:9200"]
                index => "log-java-call-%{+YYYY.MM.dd}"
            }
        }

                if [host] == "JAVA2" 
                {
            elasticsearch {
                hosts => ["192.168.5.201:9200"]
                index => "log-java-call-%{+YYYY.MM.dd}"
            }
        }
    }
}