number_of_plates <- 5
number_of_reps <- 3
raw_dataset <- vector("list", length = number_of_reps)
for(repl in 1:number_of_reps){
rawData <- c()
for(plate in 1:number_of_plates){
fileName=paste0("plate",plate,"-Rep",repl,".txt")
cat(paste0("Opening input connection: `", fileName,"`..."), "\n")
con=file(fileName,open="r")
is.first = TRUE
nline <- 0
while ( length(line <- readLines(con, n = 1)) > 0 ) {
nline <- nline + 1
# if line is blank is skipped
if (line == ""){
cat(paste0(" blank line:", nline," skipped"),"\n")
if(is.first == FALSE){
rawData <- cbind(rawData, tmp)
}
}else{
# if line contains comment beginning with: "[Table" is skipped
if( substr(line, 1, 6) == "[Table" ){
cat(paste0(" comment line: ",nline," skipped"),"\n")
}else{
if( substr(line, 1, 4) == "Time" ){
# reading header and discard first "Time" column
myColNames <- strsplit(line, split = ";")[[1]][-1]
# add plate info to column names:
myColNames <- paste0("plate_", plate, "_", myColNames)
# initialize empty array
tmp <- array(data = NA, dim = c(0, length(myColNames)))
colnames(tmp) <- myColNames
}else{
# reading data
tmp <- rbind(tmp,
as.numeric(strsplit(line, split = ";")[[1]][-1])
)
is.first = FALSE
}
}
}
}
rawData <- cbind(rawData, tmp)
cat(paste0("Closing input connection: `", fileName,"`."), "\n\n")
close(con)
}
raw_dataset[[repl]] <- rawData
}
# the raw_dataset contains all data, with dimensions: 3 replicates x 289 time-points x 480 time-series
# clean raw_dataset by filtering out blanks and weirdos
averagedData <- array(data = NA, dim = dim(raw_dataset[[1]]))
n <- dim(raw_dataset[[1]])[2]
nTime <- dim(raw_dataset[[1]])[1]
for(i in 1:n){
repl <- 1
averagedData[,i] <- raw_dataset[[repl]][,i]
for(repl in 2:number_of_reps){
averagedData[,i] <- averagedData[,i] + raw_dataset[[repl]][,i]
}
}
averagedData <- averagedData/number_of_reps
discardWells <- which(averagedData[nTime, ] < 0.015) # this threshold excludes all blank wells, it should be adjusted accordingly to other applications. Here, a blank well is defined as one where the average growth level at the last time-point (t = 289) is less than 0.015.
discardWells <- c(discardWells, c(36, 75)) # these two cases are also excluded because they don't behave nice
myDataList <- lapply(raw_dataset, function(y)y[,-discardWells]) # final dataset with N = 411 time-series.
lapply(myDataList, function(y)dim(y))