最近巡檢發現生産環境hadoop的namenode日志一直報錯,具體代碼如下:
2012-08-21 09:20:24,486 WARN org.apache.hadoop.hdfs.server.namenode.FSNamesystem: Cannot roll edit log, edits.new files already exists in all healthy directories:
/data/work/hdfs/name/current/edits.new
/backup/current/edits.new
2012-08-21 09:20:25,357 ERROR org.apache.hadoop.security.UserGroupInformation: PriviledgedActionException as:hadoop cause:java.net.ConnectException: Connection refused
2012-08-21 09:20:25,359 WARN org.mortbay.log: /getimage: java.io.IOException: GetImage failed. java.net.ConnectException: Connection refused
at java.net.PlainSocketImpl.socketConnect(Native Method)
at java.net.PlainSocketImpl.doConnect(PlainSocketImpl.java:333)
at java.net.PlainSocketImpl.connectToAddress(PlainSocketImpl.java:193)
at java.net.PlainSocketImpl.connect(PlainSocketImpl.java:182)
at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:366)
at java.net.Socket.connect(Socket.java:519)
at java.net.Socket.connect(Socket.java:469)
at sun.net.NetworkClient.doConnect(NetworkClient.java:163)
at sun.net.www.http.HttpClient.openServer(HttpClient.java:394)
at sun.net.www.http.HttpClient.openServer(HttpClient.java:529)
at sun.net.www.http.HttpClient.(HttpClient.java:233)
at sun.net.www.http.HttpClient.New(HttpClient.java:306)
at sun.net.www.http.HttpClient.New(HttpClient.java:323)
at sun.net.www.protocol.http.HttpURLConnection.getNewHttpClient(HttpURLConnection.java:852)
at sun.net.www.protocol.http.HttpURLConnection.plainConnect(HttpURLConnection.java:793)
at sun.net.www.protocol.http.HttpURLConnection.connect(HttpURLConnection.java:718)
at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1041)
at org.apache.hadoop.hdfs.server.namenode.TransferFsImage.getFileClient(TransferFsImage.java:160)
at org.apache.hadoop.hdfs.server.namenode.GetImageServlet$1$1.run(GetImageServlet.java:88)
at org.apache.hadoop.hdfs.server.namenode.GetImageServlet$1$1.run(GetImageServlet.java:85)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
at org.apache.hadoop.hdfs.server.namenode.GetImageServlet$1.run(GetImageServlet.java:85)
at org.apache.hadoop.hdfs.server.namenode.GetImageServlet$1.run(GetImageServlet.java:70)
at org.apache.hadoop.hdfs.server.namenode.GetImageServlet.doGet(GetImageServlet.java:70)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:707)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:820)
at org.mortbay.jetty.servlet.ServletHolder.handle(ServletHolder.java:511)
at org.mortbay.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1221)
at org.apache.hadoop.http.HttpServer$QuotingInputFilter.doFilter(HttpServer.java:835)
at org.mortbay.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1212)
at org.mortbay.jetty.servlet.ServletHandler.handle(ServletHandler.java:399)
at org.mortbay.jetty.security.SecurityHandler.handle(SecurityHandler.java:216)
at org.mortbay.jetty.servlet.SessionHandler.handle(SessionHandler.java:182)
at org.mortbay.jetty.handler.ContextHandler.handle(ContextHandler.java:766)
at org.mortbay.jetty.webapp.WebAppContext.handle(WebAppContext.java:450)
at org.mortbay.jetty.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:230)
at org.mortbay.jetty.handler.HandlerWrapper.handle(HandlerWrapper.java:152)
at org.mortbay.jetty.Server.handle(Server.java:326)
at org.mortbay.jetty.HttpConnection.handleRequest(HttpConnection.java:542)
at org.mortbay.jetty.HttpConnection$RequestHandler.headerComplete(HttpConnection.java:928)
at org.mortbay.jetty.HttpParser.parseNext(HttpParser.java:549)
at org.mortbay.jetty.HttpParser.parseAvailable(HttpParser.java:212)
at org.mortbay.jetty.HttpConnection.handle(HttpConnection.java:404)
at org.mortbay.io.nio.SelectChannelEndPoint.run(SelectChannelEndPoint.java:410)
at org.mortbay.thread.QueuedThreadPool$PoolThread.run(QueuedThreadPool.java:582)
網上谷歌了一番,提示了secondarynamenode和namenode擷取image出了問題,但是網上的文章說是secondarynamenode根本就無法擷取image,但是我的生産環境secondarynamenode是可以從namenode擷取日志和映像的,因為我secondarynamenode的hdfs-site.xml檔案中配置了namenode的主機名,是以能擷取,反而就是日志中一直報錯,但是不影響hdfs的使用。按照網上的辦法的解決步驟如下:
1、停止hbase、zookeeper、hadoop
2、修改namenode的hdfs-site.xml檔案中的内容:
dfs.secondary.http.address
0.0.0.0:50090
The secondary namenode http server address and port.
If the port is 0 then the server will start on a free port.
将上述的0.0.0.0修改為你部署secondarynamenode的主機名就OK了!
3、同步到所有的節點中,記得部署secondarynamenode節點的hdfs-site.xml檔案不要完全覆寫了,因為配置不一樣。
4、啟動hadoop、zookeeper、hbase。
再檢視hadoop的namenode日志相關的報錯就消失了!