Tuesday, September 5, 2017

R code refactoring: load compressed matrix and retrieve corelation

Load sparse correlation matrix from file

# Original code
relevant_matrix <- (function() {
  m <- matrix(0, 100, 100)
  apply(read.csv("amatrixfile.csv"), 1,
        function(x) { m[x[1], x[2]] <<- x[3] })
  m
})()

# refactored
# Improvement:
# 1. Remove hard coded file name
# 2. Adapt to matrix dimension
# 3. Set diagonals as 1
# 4. Make code reuseable
loadRelavanceMatrix <- function(aFileName){
  rel <- read_csv(aFileName)
  entryDim <- max(c(rel$rawSrc, rel$rawDest))

  ret <- matrix(0, entryDim, entryDim)
  rel %>%
    by_row(function(aRow) {ret[aRow[[1]],aRow[[2]] ]<<- aRow[[3]]})
 # pmap version
  #pmap(list(aRow = relevence_discipline[[1]],            
  #        aCol = relevence_discipline[[2]],
  #       aRel = relevence_discipline[[3]]),
  #   function(aRow, aCol, aRel){
  #     relevant_disciplines_matrix2[aRow, aCol] <<- aRel
   #  })

  diag(ret) <- 1

  return(ret)
}

Retrieve correlation

# Original code
relevant_disciplines <- function(disciplines) {
  if(length(disciplines)==0) { return(NULL) }
  aggregate(
    relevance ~ .,
    rbind(data.frame(id=disciplines, relevance=1.0),
      Reduce(rbind, lapply(disciplines, function(discipline_id) {
        nonzero <- relevant_disciplines_matrix[discipline_id,] > 0
        data.frame(id=which(nonzero),
                   relevance=relevant_disciplines_matrix[discipline_id, nonzero])
      }))),
    max)
}

# Refactored
relevant_disciplines

No comments:

Post a Comment