listIn <- commandArgs(TRUE) library(lubridate) data <- read.csv("/path/to/assignment/explainer") myHist <- function(df, listIn) #listIn has metric[1-numerator & 2-denominator], minValue[3], and number of months for price return[4] { nbins <- 100 df2 <- subset(df, select= c("ticker_exchange", "price", "date")) df2$date <- as.Date(df2$date, format="%d/%m/%Y") df2$date <- format(df2$date, "%m-%Y") if (listIn[2]=='1') { df <- subset(df, select=c("ticker_exchange", "price", "date", listIn[1])) df <- df[complete.cases(df), ] df[, 4] <- as.numeric(as.character(df[, 4])) df <- subset(df, df[, 4] > as.numeric(listIn[3])) } else { df <- subset(df, select=c("ticker_exchange", "price", "date", listIn[1], listIn[2])) # filtered unnecessary columns df <- df[complete.cases(df), ] df[, 4] <- as.numeric(as.character(df[, 4])) df[, 5] <- as.numeric(as.character(df[, 5])) df <- subset(df, (df[, 4]/df[, 5]) > as.numeric(listIn[3])) } #============================= applied constraint for metric above =================================== df$date <- as.Date(df$date, format="%d/%m/%Y") # converted to date object df$nextDate <- df$date %m+% months(as.integer(listIn[4])) # found next date for calculating returns df$date <- format(df$date, "%m-%Y") df$nextDate <- format(df$nextDate, "%m-%Y") # changed format of both for correct m-Y matching df <- subset(df, select=c("ticker_exchange", "price", "nextDate")) outframe <- merge(df, df2, by.x=c("nextDate", "ticker_exchange"), by.y=c("date", "ticker_exchange")) #============================== arranged 2 prices at 2 dates by merging along ticker and dates ========= colnames(outframe)[3] <- "P2" colnames(outframe)[4] <- "P1" outframe[, 3] <- as.numeric(as.character(outframe[, 3])) outframe[, 4] <- as.numeric(as.character(outframe[, 4])) outframe$returns <- ((outframe$P1/outframe$P2) -1)*100 #calculated returns outframe <- unique(outframe[complete.cases(outframe), ]) #removed duplicate entries and NAs range <- max(outframe$returns)-min(outframe$returns) bins <- seq(min(outframe$returns)-(range/100), max(outframe$returns)+(range/100), by=range/nbins) hist(outframe$returns, breaks=bins) return(transform(table(cut(outframe$returns, bins)))) } myHist(data, listIn)