### Step 0 - Download this file
#' If you left-click on the Homework 1 link it will open a tab in your browser that displays text.
#' You can copy and paste the content of that page into a new R script.
#' Or you can right click on the Homework 1 link and click save as then select save as R script.

### Step 1 - Download the data
#' For this homework you will be working with the class data from STATLAB - Speeded Reaction Time.
#' You can download this data from the class website: http://www2.psych.purdue.edu/~gfrancis/Classes/PSY392/indexF19.html
#' Once at the website, navigate to the row containing August 26.
#' The data is located in the far right column of this row.
#' Download the data by clicking on '(class data)'.
#' If you are having trouble downloading the data you may need to right click on the class data link and select save link as.

### Step 2 - Set up working directories
#' Now that the data is downloaded it is important to set up a working directory.
#' You can do this however you like, but this is how I typically set up my folder structures for classes. i.e., a folder for PSY 392
#' Inside my PSY 392 folder I create a folder for homework - i.e., 'HW'
#' Place this R script and the data file into this homework subfolder within your class folder.
#' When you use this R script in RStudio you will need to tell it what the working directory is.
#' To do this click on Session within the top menu bar.
#' Then click on Set Working Directory => Choose Directory.
#' Then navigate to your HW subfolder that you created, which is where this Rscript and the data should now be.
#' The point of all of this is to tell R where necessary files are located on your computer so that R can work with them.


### Step 3 - Read in data
#' Now you can check if your working directory has been set correctly.
getwd() # what is my current working directory?

#' Is the data file in your current working directory
list.files() # what are the files in my current working directory?

# read in the csv file that contains the class data - be sure to specify the correct file!  
HW1data <-read.csv("SimpleRT.csv") # we are going to store the data into a variable called HW1data

### Step 4 - Getting to know the data
#' You should now see HW1data in the top right portion of RStudio in the Global Enivronment window
#' It tells you that there are 600 observations in 2 columns
names(HW1data) # what are the names of the column in my data?

#' Uh oh! We do not have a subject number column.
#' To fix this we can create a new column that creates an id number that repeats for the number of trials each participant completed
#' We know that each participant completed 50 trials. Thus, our subject id variable will repeat 50 times for each participant before changing.
#' We also know that we have 600 total observations. 600 total observations divided by 50 observations per person = 12 people.
#' So, we will create 12 subject numbers that each repeat 50 times.
#' We then create a new column in our data frame for subjectNumbers
HW1data$subjectNumber <- rep(1:12, each = 50) 
View(HW1data) # look at data and see that subject numbers repeat 50 times and then increases


### Step 5 - Descriptive statistics
#' If you are not sure how to do this in R then you should search for the information online.
#' An important part of programming is learning how to learn to program on your own.
#' Of course, if you are struggling please contact us or come to office hours.

#' Compute the following statistcs for ReactionTime_ms:
summary(HW1data$ReactionTime_ms) # gives min, median, mean, max

#' Mean
mean(HW1data$ReactionTime_ms)
#' Median
median(HW1data$ReactionTime_ms)
#' Mode (not graded)
y<-table(HW1data$ReactionTime_ms)
names(y)[which(y==max(y))]

sort(table(HW1data$ReactionTime_ms),decreasing=TRUE)[1]
#' Standard deviation
sd(HW1data$ReactionTime_ms)
#' Variance
var(HW1data$ReactionTime_ms)
#' Minimum value
min(HW1data$ReactionTime_ms)
#' Maximum value 
max(HW1data$ReactionTime_ms)
#' Range
max(HW1data$ReactionTime_ms) - min(HW1data$ReactionTime_ms)


### Step 6 - Data visualization
#' Try your best to create a visualization of the data
#' Here is a link to some simple plots in R - https://www.statmethods.net/graphs/index.html
#' Provide reasoning for the type of plot you chose
#' Describe your visualization


# here are some examples of plotting this data
plot(HW1data$ReactionTime_ms~HW1data$Trial) # scatterplot - maybe wanted to see if people got faster or slower as the experiment progressed
boxplot(HW1data$ReactionTime_ms) # gives useful information such a visual of min, max, median, Q1, Q3, IQR, outliers
boxplot(HW1data$ReactionTime_ms~HW1data$Trial) # maybe some trials look weird?
hist(HW1data$ReactionTime_ms) # useful to get a sense of the data
plot(density(HW1data$ReactionTime_ms))


### Step 7 - Data tidying at the subject level
#' So, far we have looked at the data for every trial for every person.
#' However, we frequently care about how individual people or groups do.
#' Thus, we need to average across every trial for each participant to get the average reaction time for each person.

#' To do this I am going to use the tidyverse package
install.packages("tidyverse") # install the package we need -- you only need to do this once
library(tidyverse) # make sure the package is loaded.

#' The tidyverse uses the pipe symbol '%>%'
#' The shortcut for this is control + shift + m on pc or command + shift + m on mac

subDataHW1 <- HW1data %>%                         # create a new data frame from the HW1data
  group_by(subjectNumber) %>%                     # that groups the data by subject number
  summarize(meanSubRT_ms = mean(ReactionTime_ms)) # then summarize the data by calculating the mean RT for each subject

### Step 8
#' Who was the fastest subject? Subject 10 meanRT = 272
#' Who was the slowest subject? Subject 11 meanRT = 440

### Simplest way to solve this problem would be to click on the data frame and sort the column.
### Of course you can write code to do this too.
sortedSubData <- subDataHW1[order(subDataHW1$meanSubRT_ms),]
sortedSubData # view all data
sortedSubData[1,] # view first row (fastest person)
sortedSubData[12,] # view last row (slowest person)


#' For these questions you can click on the dataframe and sort it to find who is fastest and slowest.

#' Create a visualization of the distribution of meanRTs at the subject level
#' Provide reasoning for the type of plot you chose
#' Describe your visualization

boxplot(subDataHW1$meanSubRT_ms)
plot(density(subDataHW1$meanSubRT_ms))
hist(subDataHW1$meanSubRT_ms)
barplot(subDataHW1$meanSubRT_ms)


library(tidyverse) # I actually use ggplot (which is a part of the tidyverse!) to make my plots

# note that I use the originally HW1data and a compute a stat_summary using the mean function and mean_se (standard error) 
ggplot(HW1data, aes(x = as.factor(subjectNumber), y=ReactionTime_ms)) +
  stat_summary(fun.y = mean, geom = 'bar', position = 'dodge', color = 'black', width = .825) +
  stat_summary(fun.data = mean_se, geom = 'errorbar', 
               position = position_dodge(width = .8), width = .2) + 
  # label plot
  ggtitle("Subject Level Reaction Time") + # label plot
  ylab('Reaction Time')    + # y axis label
  xlab('\nSubject Number')                      + # deletes x axis label
  scale_x_discrete(labels = c(1:12))+
  # color the bars
  scale_fill_manual(values=c("#FF8682")) + 
  # y axis limits and tick marks
  theme(
    # titles
    plot.title   = element_text(size= 16, face = "bold",margin=margin(0,0,8,0), hjust = 0.5),
    axis.title.x = element_text(size = 14,margin=margin(6,0,0,0)),
    axis.title.y = element_text(size = 14, face = "bold", margin=margin(0,10,0,0)),
    # axes
    axis.text.x = element_text(size = 12, colour="black", margin=margin(14,0,0,0,"pt")),
    axis.text.y = element_text(size = 12, colour="black", margin=margin(0,8,0,0,"pt")),
    axis.line   = element_line(colour = "black"),
    # panel
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.background = element_rect(fill = "white"),
    # tick marks
    axis.ticks.length = unit(-.2, "cm"),
    axis.ticks.x      = element_blank(),
    axis.ticks.y      = element_line(colour="black"),
    # legend
    legend.position = "none" # removes legend
  )