天天看點

基于RHadoop的Logistic regression算法

library(rmr2)

## @knitr logistic.regression-signature
logistic.regression = 
  function(input, iterations, dims, alpha){

## @knitr logistic.regression-map
  lr.map =          
    function(., M) {
      Y = M[,1] 
      X = M[,-1]
      keyval(
        1,
        Y * X * 
          g(-Y * as.numeric(X %*% t(plane))))}
## @knitr logistic.regression-reduce
  lr.reduce =
    function(k, Z) 
      keyval(k, t(as.matrix(apply(Z,2,sum))))
## @knitr logistic.regression-main
  plane = t(rep(0, dims))
  g = function(z) 1/(1 + exp(-z))
  for (i in 1:iterations) {
    gradient = 
      values(
        from.dfs(
          mapreduce(
            input,
            map = lr.map,     
            reduce = lr.reduce,
            combine = TRUE)))
    plane = plane + alpha * gradient }
  plane }
## @knitr end

library(robustbase)	
data(footstamp)

testdata <- to.dfs(as.matrix(foodstamp))

print(logistic.regression(testdata,10,3,0.05))
           

繼續閱讀