# Options options(stringsAsFactors = F) ############################################################################### # Initialize data # Assumes the file coreresults.tsv is in the same directory as this script!!!!! # Save results to the following file: sink("results.log") # Load data sources into tables data <- read.csv(file = "coreresults.tsv", sep="\t", quote = "", na.strings = c("NA", "#N/A")) cat("Number of recipes in dataset:", nrow(data), "\n") ################################################################################ # Data cleaning # Convert blanks or #N/A to NA for (col in colnames(data)) { data[[col]] <- ifelse(data[[col]] == "" | data[[col]] == "#N/A", rep(NA, nrow(data)), data[[col]]) } data$author <- factor(data$author) ta <- sort(table(data$author)) ta2 <- as.data.frame(ta) cat("\n\n\n#######AUTHORS#######\n") cat("The number of unique authors: ") cat(length(ta)) cat("\nSummary stats about the number of recipes each author shared:\n") summary(ta2) cat("\nTop 3 most prolific authors:\n") cat(paste(" ", names(ta)[length(ta)], ": ", ta[length(ta)], " recipes\n", sep="")) cat(paste(" ", names(ta)[length(ta)-1], ": ", ta[length(ta)-1], " recipes\n", sep="")) cat(paste(" ", names(ta)[length(ta)-2], ": ", ta[length(ta)-2], " recipes\n", sep="")) ############################################################################### # Export data sink()