To extend the users supported by Tableau servers (e.g. default 10 seat license)
Time sequence diagram
Tuesday, September 5, 2017
R code refactoring: load compressed matrix and retrieve corelation
Load sparse correlation matrix from file
# Original code
relevant_matrix <- (function() {
m <- matrix(0, 100, 100)
apply(read.csv("amatrixfile.csv"), 1,
function(x) { m[x[1], x[2]] <<- x[3] })
m
})()
# refactored
# Improvement:
# 1. Remove hard coded file name
# 2. Adapt to matrix dimension
# 3. Set diagonals as 1
# 4. Make code reuseable
loadRelavanceMatrix <- function(aFileName){
rel <- read_csv(aFileName)
entryDim <- max(c(rel$rawSrc, rel$rawDest))
ret <- matrix(0, entryDim, entryDim)
rel %>%
by_row(function(aRow) {ret[aRow[[1]],aRow[[2]] ]<<- aRow[[3]]})
# pmap version
#pmap(list(aRow = relevence_discipline[[1]],
# aCol = relevence_discipline[[2]],
# aRel = relevence_discipline[[3]]),
# function(aRow, aCol, aRel){
# relevant_disciplines_matrix2[aRow, aCol] <<- aRel
# })
diag(ret) <- 1
return(ret)
}
Retrieve correlation
# Original code
relevant_disciplines <- function(disciplines) {
if(length(disciplines)==0) { return(NULL) }
aggregate(
relevance ~ .,
rbind(data.frame(id=disciplines, relevance=1.0),
Reduce(rbind, lapply(disciplines, function(discipline_id) {
nonzero <- relevant_disciplines_matrix[discipline_id,] > 0
data.frame(id=which(nonzero),
relevance=relevant_disciplines_matrix[discipline_id, nonzero])
}))),
max)
}
# Refactored
relevant_disciplines
# Original code
relevant_matrix <- (function() {
m <- matrix(0, 100, 100)
apply(read.csv("amatrixfile.csv"), 1,
function(x) { m[x[1], x[2]] <<- x[3] })
m
})()
# refactored
# Improvement:
# 1. Remove hard coded file name
# 2. Adapt to matrix dimension
# 3. Set diagonals as 1
# 4. Make code reuseable
loadRelavanceMatrix <- function(aFileName){
rel <- read_csv(aFileName)
entryDim <- max(c(rel$rawSrc, rel$rawDest))
ret <- matrix(0, entryDim, entryDim)
rel %>%
by_row(function(aRow) {ret[aRow[[1]],aRow[[2]] ]<<- aRow[[3]]})
# pmap version
#pmap(list(aRow = relevence_discipline[[1]],
# aCol = relevence_discipline[[2]],
# aRel = relevence_discipline[[3]]),
# function(aRow, aCol, aRel){
# relevant_disciplines_matrix2[aRow, aCol] <<- aRel
# })
return(ret)
}
Retrieve correlation
# Original code
relevant_disciplines <- function(disciplines) {
if(length(disciplines)==0) { return(NULL) }
aggregate(
relevance ~ .,
rbind(data.frame(id=disciplines, relevance=1.0),
Reduce(rbind, lapply(disciplines, function(discipline_id) {
nonzero <- relevant_disciplines_matrix[discipline_id,] > 0
data.frame(id=which(nonzero),
relevance=relevant_disciplines_matrix[discipline_id, nonzero])
}))),
max)
}
# Refactored
relevant_disciplines
Subscribe to:
Posts (Atom)