Title: Getting NJ assessment data into R: part 4 in a series.
Date: 2015-04-21
Category: education
Tags: NJ, assessment, NJASK, HSPA, data_management, tutorial
Slug: reading-nj-assess-data-4
Author: Andrew Martin

```{r, echo=FALSE}
#SET THIS TO TRUE WHEN READY TO PUBLISH
ready_to_ship = TRUE

library(knitr)
hook_plot <- knit_hooks$get('plot')

knit_hooks$set(plot=function(x, options) {
    if (!is.null(options$pelican.publish) && options$pelican.publish) {
        x <- paste0("{filename}", x)
    }
    hook_plot(x, options)
})
opts_chunk$set(dev='Cairo_svg')
opts_chunk$set(pelican.publish=ready_to_ship)

```

In my [last post]({filename}/06_njask-data-3.Rmd), I showed how to extend the functions I developed for NJASK data to other state assessments.  In this post, I'll tie eveything together, and write some general functions that bring a wide variety of NJ state assessment data into R.

Roughly speaking, we're tying to write a function that will return data given a `year` and a `grade`.  Here are the big things that need to happen:

1. Check that call we made is a valid grade/year combination (raising an informative error if not)

2. Map the grade / year call to the correct `get_blank_data` function (NJASK? HSPA? GEPA?)

3. Fetch, clean, and return the data frame.

Let's tackle each of these in turn:.<!-- PELICAN_END_SUMMARY -->

# valid calls

Before we do anything, let's source in all of the functions `fetch_hspa()`, `fetch_gepa()` created in the previous two posts.

```{r source_prev}

knitr::knit('05_njask-data-2.rmd', tangle=TRUE)
source('05_njask-data-2.R')

knitr::knit('06_njask-data-3.rmd', tangle=TRUE)
source('06_njask-data-3.R')

```

This function will test if a year/grade call is valid.

```{r valid_call}

valid_call <- function(year, grade) {
  #data for 2015 school year doesn't exist yet
  #common core transition started in 2015 (njask is no more)
  if(year > 2014) {
    valid_call <- FALSE
  #assessment coverage 3:8 from 2006 on.
  #NJASK fully implemented in 2008
  } else if(year >= 2006) {
    valid_call <- grade %in% c(3:8, 11)
  } else if (year >= 2004) {
    valid_call <- grade %in% c(3, 4, 8, 11)
  } else if (year < 2004) {
    valid_call <- FALSE
  }
  
  return(valid_call)
}

```

# map for retrieval

This function does normal retrieval (NJASK for 3-8; HSPA for 11).

```{r standard_grades}

standard_assess <- function(year, grade) {
  if(grade %in% c(3:8)) {
    assess_data <- fetch_njask(year, grade)
  } else if (grade == 11) {
    assess_data <- fetch_hspa(year) 
  }
  
  return(assess_data)
} 

```

Here is a mapping function that calls the correct retrieval method, given grade and year.

```{r main_wrapper}

fetch_nj_assess <- function(year, grade) {
  require(ensurer)
  
  #only allow valid calls
  valid_call(year, grade) %>%
    ensure_that(
      all(.) ~ "invalid grade/year parameter passed")
  
  #everything post 2008 has the same grade coverage
  if (year >= 2008) {
    assess_data <- standard_assess(year, grade)
    
  #2006 and 2007: NJASK 3rd-7th, GEPA 8th, HSPA 11th
  } else if (year %in% c(2006, 2007)) {
    if (grade %in% c(3:7)) {
      assess_data <- standard_assess(year, grade)  
    } else if (grade == 8) {
      assess_data <- fetch_gepa(year)
    } else if (grade == 11) {
      assess_data <- fetch_hspa(year)
    }
    
  #2004 and 2005:  NJASK 3rd & 4th, GEPA 8th, HSPA 11th
  } else if (year %in% c(2004, 2005)) {
    if (grade %in% c(3:4)) {
      assess_data <- standard_assess(year, grade)  
    } else if (grade == 8) {
      assess_data <- fetch_gepa(year)
    } else if (grade == 11) {
      assess_data <- fetch_hspa(year)
    }
  
  } else {
    #if we ever reached this block, there's a problem with our `valid_call()` function
    stop("unable to match your grade/year parameters to the appropriate function.")
  }
 
  return(assess_data)
}

```

try it out:

```{r try_wrapper}

fetch_nj_assess(2014, 6) %>% select(CDS_Code:TOTAL_POPULATION_LANGUAGE_ARTS_Scale_Score_Mean) %>% head()

```

# all together

Finally, as a convenience, let's write a function that brings down all of the NJASK data for all years and grades.

```{r all_njask}

fetch_all_nj <- function() {
  
  #make the df of years and grades to iterate over  
  post2006_years <- c(2006:2014)
  post2006_grades <- c(3:8, 11)
  
  pre2006_years <- c(2004, 2005)
  pre2006_grades <- c(3, 4, 8, 11)

  #subset just for testing
  #post2006_years <- c(2006)
  #post2006_grades <- c(8, 11)
  #pre2006_grades <- c(4)
  
  df <- data.frame(
    year = vector(mode="numeric", length=0),
    grade = vector(mode="numeric", length=0)
  )
  
  for (i in post2006_years) {
    #use R recycling to make df
    int_df <- data.frame(
      year = i,
      grade = post2006_grades
    )
    
    df <- rbind(df, int_df)
  }
  
  for (j in pre2006_years) {
    #use R recycling to make df
    int_df <- data.frame(
      year = j,
      grade = pre2006_grades
    )
    
    df <- rbind(df, int_df)
  }

  #sort the df
  df <- df %>% dplyr::arrange(
    desc(year), grade
  )
  
  #to hold the results
  results <- list()
  
  #iterate over the df and get the data
  for (i in 1:nrow(df)) {
    this_row <- df[i, ]
    #be verbose
    row_key <- paste0('nj', this_row$year, 'gr', this_row$grade)
    print(row_key)
    
    #call this grade/year and attach to results list
    results[[row_key]] <-  fetch_nj_assess(this_row$year, this_row$grade)
  }

  return(results)  
}

```

test it:

```{r test_all_data}

all_nj <- fetch_all_nj()

length(all_nj)

```