Attention conservaton notice: this post is just data munging to get a csv with HSPA metadata shipshape. Tried to set it to status: hidden so it wouldn't show on the front page of the blog, but had some trouble with Pelican. That said, work has to get done, so better to do it in an .Rmd and write down what I was thinking - I've learned that lesson like a thousand times. If you are in the small group of people who care about this sort of thing, get in touch.

First, read in the csv. This proved to be kind of a headache:

the format of the HSPA file layout is totally different from the NJASK file (sigh). The key difference is that the spanning headers that indicate the subgroup (gen ed, special ed, etc) are indicated as have start/end position across the whole relevant range in the NJASK file, but on the HSPA file they only show up as 6 character fields before the relevant range. I hand-edited the HSPA file so it would be consistent with the NJASK file.

NJASK:

HSPA:

This.... I can't say anything nice about the decision to do this (why?!?) so I'm just going to move on.

hspa <- readr::read_csv(paste0(final_path, "datasets/hspa_layout.csv"))

## Error: 'datasets/hspa_layout.csv' does not exist in current working directory ('/Users/almartin/Google Drive/repositories/almart.in-source/content/pages').

names(hspa) <- tolower(gsub(' ', '_', names(hspa)))

## Error in gsub(" ", "_", names(hspa)): object 'hspa' not found

head(hspa)

## Error in head(hspa): object 'hspa' not found

Let's rework the code from the first post into a proper function.

library(sqldf)
library(magrittr)
library(dplyr)
library(reshape2)

process_layout <- function(df) {
  require(sqldf)
  require(magrittr)
  require(dplyr)
  require(reshape2)  

  #split spanners from keepers
  spanners <- dplyr::filter(df, structural==TRUE)
  keepers <- dplyr::filter(df, structural==FALSE)

  #join spanners to keepers
  with_spanners <- sqldf('
    SELECT keepers.*
          ,spanners.data_type AS spanner
          ,spanners.field_length AS spanner_length
    FROM keepers
    LEFT OUTER JOIN spanners
      ON keepers.field_start_position >= spanners.field_start_position
     AND keepers.field_end_position <= spanners.field_end_position
  ')

  #tag the joined data frame with a row number to facilitate long -> wide
  with_rn <- with_spanners %>%
    dplyr::group_by(
       field_start_position, field_end_position, field_length, 
       data_type, description, comments, valid_values  
    ) %>%
    mutate(
      rn = order(desc(spanner_length)) 
    ) %>%
    select(
      field_start_position, field_end_position, field_length, 
      data_type, description, comments, valid_values, spanner, rn
    ) %>%
    as.data.frame()

  #text processing and mask NAs
  with_rn$rn <- paste0('spanner', with_rn$rn)
  with_rn$spanner <- ifelse(is.na(with_rn$spanner),'', with_rn$spanner)

  layout_wide <- dcast(
    data = with_rn,
    formula = field_start_position + field_end_position + field_length +
      data_type + description + comments + valid_values ~ rn,
    value.var = "spanner"
  )

  #this appears to be a bug in dcast?  should not be needed.
  layout_wide$spanner2 <- ifelse(is.na(layout_wide$spanner2),'', layout_wide$spanner2)

  reserved_chars <- list('+' = 'and', '(' = '', ')' = '')
  for (i in 1:length(reserved_chars)) {
    layout_wide$spanner1 <- gsub(
      names(reserved_chars)[i], 
      reserved_chars[i], 
      layout_wide$spanner1, 
      fixed = TRUE
    )
  }

  #make final name
  layout_wide$final_name <- layout_wide %$% paste(spanner1, spanner2, description, sep='_')

  #kill double underscores
  layout_wide$final_name <- gsub('__', '_', layout_wide$final_name)
  #kill leading or trailer underscores
  layout_wide$final_name <- gsub("(^_+|_+$)", "", layout_wide$final_name)
  #trim any remaining whitespace
  layout_wide$final_name <- gsub("^\\s+|\\s+$", "", layout_wide$final_name)
  #all whitespace becomes underscore
  layout_wide$final_name <- gsub(' ', '_', layout_wide$final_name)

  #more whitespace cleanup
  layout_wide$comments <- gsub("^\\s+|\\s+$", "", layout_wide$comments)
  layout_wide$description <- gsub("^\\s+|\\s+$", "", layout_wide$description)
  layout_wide$valid_values <- gsub("^\\s+|\\s+$", "", layout_wide$valid_values)

  return(layout_wide)
}

layout_hspa <- process_layout(hspa)

## Error in filter_(.data, .dots = lazyeval::lazy_dots(...)): object 'hspa' not found

layout_hspa %>% head()

##   field_start_position field_end_position field_length data_type
## 1                    1                  9            9      Text
## 2                    1                  9            9      Text
## 3                    1                  2            2      Text
## 4                    3                  6            4      Text
## 5                    7                  9            3      Text
## 6                   10                 59           50      Text
##     description                                              comments
## 1    RECORD KEY                                                      
## 2      CDS Code                                                      
## 3   County Code                                                      
## 4 District Code Applicable only for district and school aggregations.
## 5   School Code              Applicable only for school aggregations.
## 6   County Name                                                      
##                                                                                                                                                                                               valid_values
## 1                                                                                                                                                                                                         
## 2 CDS codes for schools and districts\nTHE FIRST TWO POSITIONS WILL INCLUDE THE FOLLOWING AGGREGATION CODES: NS = Non-Special Needs; SN = Special Needs; ST = State; A = DFG A; B = DFG B; CD= DFG CD\x85.
## 3                                                                                                       01, 03, 05, 07, 09, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 80
## 4                                                                                                                                                                                      0100 to 9999, blank
## 5                                                                                                                                                                                        001 to 999, blank
## 6                                                                                                                                     A to Z, blank;  Applicable only for district and school aggregations
##   spanner1 spanner2    final_name
## 1                      RECORD_KEY
## 2                        CDS_Code
## 3                     County_Code
## 4                   District_Code
## 5                     School_Code
## 6                     County_Name

Finally, save the hspa layout as an .rda file.

save(layout_hspa, file = paste0(final_path,'datasets/hspa_layout.rda'))

## Warning in gzfile(file, "wb"): cannot open compressed file 'datasets/
## hspa_layout.rda', probable reason 'No such file or directory'

## Error in gzfile(file, "wb"): cannot open the connection

old HSPA

Before 2010 the HSPA used a different layout. Prep that layout file:

Read:

hspa2010 <- readr::read_csv(paste0(final_path, "datasets/hspa2010_layout.csv"))

## Error: 'datasets/hspa2010_layout.csv' does not exist in current working directory ('/Users/almartin/Google Drive/repositories/almart.in-source/content/pages').

names(hspa2010) <- tolower(gsub(' ', '_', names(hspa2010)))

## Error in gsub(" ", "_", names(hspa2010)): object 'hspa2010' not found

head(hspa2010)

## Error in head(hspa2010): object 'hspa2010' not found

One note here: the layout at flat file has headers for science, even though it isn't on the HSPA. I'm omitting the last set of science headers in my layout file, because the raw files on the state website don't appear to be padded with enough blanks for the last NA slots.

Process:

layout_hspa2010 <- process_layout(hspa2010)

## Error in filter_(.data, .dots = lazyeval::lazy_dots(...)): object 'hspa2010' not found

layout_hspa2010 %>% head()

##   field_start_position field_end_position field_length data_type
## 1                    1                  9            9      Text
## 2                    1                  9            9      Text
## 3                    1                  2            2      Text
## 4                    3                  6            4      Text
## 5                    7                  9            3      Text
## 6                   10                 59           50      Text
##     description                                              comments
## 1    RECORD KEY                                                      
## 2      CDS Code                                                      
## 3   County Code                                                      
## 4 District Code Applicable only for district and school aggregations.
## 5   School Code              Applicable only for school aggregations.
## 6   County Name                                                      
##                                                                                                                                                                                               valid_values
## 1                                                                                                                                                                                                         
## 2 CDS codes for schools and districts\nTHE FIRST TWO POSITIONS WILL INCLUDE THE FOLLOWING AGGREGATION CODES: NS = Non-Special Needs; SN = Special Needs; ST = State; A = DFG A; B = DFG B; CD= DFG CD\x85.
## 3                                                                                                       01, 03, 05, 07, 09, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 80
## 4                                                                                                                                                                                      0100 to 9999, blank
## 5                                                                                                                                                                                        001 to 999, blank
## 6                                                                                                                                     A to Z, blank;  Applicable only for district and school aggregations
##   spanner1 spanner2    final_name
## 1                      RECORD_KEY
## 2                        CDS_Code
## 3                     County_Code
## 4                   District_Code
## 5                     School_Code
## 6                     County_Name

Save:

save(layout_hspa2010, file = paste0(final_path, 'datasets/hspa2010_layout.rda'))

## Warning in gzfile(file, "wb"): cannot open compressed file 'datasets/
## hspa2010_layout.rda', probable reason 'No such file or directory'

## Error in gzfile(file, "wb"): cannot open the connection

GEPA

Now do the same for the GEPA layout, using the functions developed above:

Read:

gepa <- readr::read_csv(paste0(final_path, 'datasets/gepa_layout.csv'))

## Error: 'datasets/gepa_layout.csv' does not exist in current working directory ('/Users/almartin/Google Drive/repositories/almart.in-source/content/pages').

names(gepa) <- tolower(gsub(' ', '_', names(gepa)))

## Error in gsub(" ", "_", names(gepa)): object 'gepa' not found

head(gepa)

## Error in head(gepa): object 'gepa' not found

Process:

layout_gepa <- process_layout(gepa)

## Error in filter_(.data, .dots = lazyeval::lazy_dots(...)): object 'gepa' not found

layout_gepa %>% head()

##   field_start_position field_end_position field_length data_type
## 1                    1                  9            9      Text
## 2                    1                  2            2      Text
## 3                    3                  6            4      Text
## 4                    7                  9            3      Text
## 5                   10                 59           50      Text
## 6                   60                109           50      Text
##     description comments
## 1      CDS Code         
## 2   County Code         
## 3 District Code         
## 4   School Code         
## 5   County Name         
## 6 District Name         
##                                                                                                                                                                                               valid_values
## 1 CDS codes for schools and districts\nTHE FIRST TWO POSITIONS WILL INCLUDE THE FOLLOWING AGGREGATION CODES: NS = Non-Special Needs; SN = Special Needs; ST = State; A = DFG A; B = DFG B; CD= DFG CD\x85.
## 2                                                                        01, 03, 05, 07, 09, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 80, ST, A, B, CD, DE, FG, GH, I, J, R, NS, SN
## 3                                                                                                                                      0100 to 9999;  Applicable only for district and school aggregations
## 4                                                                                                                                                     001 to 999;  Applicable only for school aggregations
## 5                                                                                                                                     A to Z, blank;  Applicable only for district and school aggregations
## 6                                                                                                                                     A to Z, blank;  Applicable only for district and school aggregations
##   spanner1 spanner2    final_name
## 1                        CDS_Code
## 2                     County_Code
## 3                   District_Code
## 4                     School_Code
## 5                     County_Name
## 6                   District_Name

Save:

save(layout_gepa, file = paste0(final_path, 'datasets/gepa_layout.rda'))

## Warning in gzfile(file, "wb"): cannot open compressed file 'datasets/
## gepa_layout.rda', probable reason 'No such file or directory'

## Error in gzfile(file, "wb"): cannot open the connection

old NJASK

repeat for old NJASK

Read:

njask05 <- readr::read_csv(paste0(final_path, 'datasets/njask2005_layout.csv'))

## Error: 'datasets/njask2005_layout.csv' does not exist in current working directory ('/Users/almartin/Google Drive/repositories/almart.in-source/content/pages').

names(njask05) <- tolower(gsub(' ', '_', names(njask05)))

## Error in gsub(" ", "_", names(njask05)): object 'njask05' not found

head(njask05)

## Error in head(njask05): object 'njask05' not found

Process:

layout_njask05 <- process_layout(njask05)

## Error in filter_(.data, .dots = lazyeval::lazy_dots(...)): object 'njask05' not found

layout_njask05 %>% head()

##   field_start_position field_end_position field_length data_type
## 1                    1                  9            9      Text
## 2                    1                  2            2      Text
## 3                    3                  6            4      Text
## 4                    7                  9            3      Text
## 5                   10                 59           50      Text
## 6                   60                109           50      Text
##     description comments
## 1      CDS Code         
## 2   County Code         
## 3 District Code         
## 4   School Code         
## 5   County Name         
## 6 District Name         
##                                                                                                                                                                                               valid_values
## 1 CDS codes for schools and districts\nTHE FIRST TWO POSITIONS WILL INCLUDE THE FOLLOWING AGGREGATION CODES: NS = Non-Special Needs; SN = Special Needs; ST = State; A = DFG A; B = DFG B; CD= DFG CD\x85.
## 2                                                                                                   01, 03, 05, 07, 09, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 80, 90
## 3                                                                                                                                                                                             0100 to 9999
## 4                                                                                                                                                                                               001 to 999
## 5                                                                                                                                     A to Z, blank;  Applicable only for district and school aggregations
## 6                                                                                                                                     A to Z, blank;  Applicable only for district and school aggregations
##   spanner1 spanner2    final_name
## 1                        CDS_Code
## 2                     County_Code
## 3                   District_Code
## 4                     School_Code
## 5                     County_Name
## 6                   District_Name

layout_njask05 %>% tail()

##     field_start_position field_end_position field_length data_type
## 430                 2283               2288            6   Integer
## 431                 2289               2292            4   Decimal
## 432                 2293               2296            4   Decimal
## 433                 2297               2300            4   Decimal
## 434                 2301               2304            4   Decimal
## 435                 2305               2306            2   Integer
##                         description                  comments
## 430    Number of Valid Scale Scores                          
## 431 Partially Proficient Percentage       One implied decimal
## 432           Proficient Percentage       One implied decimal
## 433  Advanced Proficient Percentage       One implied decimal
## 434                Scale Score Mean       One implied decimal
## 435                           Grade Located in text file only
##       valid_values spanner1                       spanner2
## 430   0 or greater  SCIENCE NON-ECONOMICALLY DISADVANTAGED
## 431   0.0 to 100.0  SCIENCE NON-ECONOMICALLY DISADVANTAGED
## 432   0.0 to 100.0  SCIENCE NON-ECONOMICALLY DISADVANTAGED
## 433   0.0 to 100.0  SCIENCE NON-ECONOMICALLY DISADVANTAGED
## 434 100.0 to 300.0  SCIENCE NON-ECONOMICALLY DISADVANTAGED
## 435         03, 04                                        
##                                                                 final_name
## 430    SCIENCE_NON-ECONOMICALLY_DISADVANTAGED_Number_of_Valid_Scale_Scores
## 431 SCIENCE_NON-ECONOMICALLY_DISADVANTAGED_Partially_Proficient_Percentage
## 432           SCIENCE_NON-ECONOMICALLY_DISADVANTAGED_Proficient_Percentage
## 433  SCIENCE_NON-ECONOMICALLY_DISADVANTAGED_Advanced_Proficient_Percentage
## 434                SCIENCE_NON-ECONOMICALLY_DISADVANTAGED_Scale_Score_Mean
## 435                                                                  Grade

Save:

save(layout_njask05, file = paste0(final_path, 'datasets/njask05_layout.rda'))

## Warning in gzfile(file, "wb"): cannot open compressed file 'datasets/
## njask05_layout.rda', probable reason 'No such file or directory'

## Error in gzfile(file, "wb"): cannot open the connection

(2004) old NJASK

once more for 2004 NJASK

Read:

njask04 <- readr::read_csv(paste0(final_path, 'datasets/njask2004_layout.csv'))

## Error: 'datasets/njask2004_layout.csv' does not exist in current working directory ('/Users/almartin/Google Drive/repositories/almart.in-source/content/pages').

names(njask04) <- tolower(gsub(' ', '_', names(njask04)))

## Error in gsub(" ", "_", names(njask04)): object 'njask04' not found

head(njask04)

## Error in head(njask04): object 'njask04' not found

Process:

layout_njask04 <- process_layout(njask04)

## Error in filter_(.data, .dots = lazyeval::lazy_dots(...)): object 'njask04' not found

layout_njask04 %>% head()

##   field_start_position field_end_position field_length data_type
## 1                    1                  9            9      Text
## 2                    1                  2            2      Text
## 3                    3                  6            4      Text
## 4                    7                  9            3      Text
## 5                   10                 59           50      Text
## 6                   60                109           50      Text
##     description comments
## 1      CDS Code         
## 2   County Code         
## 3 District Code         
## 4   School Code         
## 5   County Name         
## 6 District Name         
##                                                                                                                                                                                               valid_values
## 1 CDS codes for schools and districts\nTHE FIRST TWO POSITIONS WILL INCLUDE THE FOLLOWING AGGREGATION CODES: NS = Non-Special Needs; SN = Special Needs; ST = State; A = DFG A; B = DFG B; CD= DFG CD\x85.
## 2                                                                                                   01, 03, 05, 07, 09, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 80, 90
## 3                                                                                                                                                                                             0100 to 9999
## 4                                                                                                                                                                                               001 to 999
## 5                                                                                                                                     A to Z, blank;  Applicable only for district and school aggregations
## 6                                                                                                                                     A to Z, blank;  Applicable only for district and school aggregations
##   spanner1 spanner2    final_name
## 1                        CDS_Code
## 2                     County_Code
## 3                   District_Code
## 4                     School_Code
## 5                     County_Name
## 6                   District_Name

layout_njask04 %>% tail()

##     field_start_position field_end_position field_length data_type
## 357                 1869               1874            6   Integer
## 358                 1875               1878            4   Decimal
## 359                 1879               1882            4   Decimal
## 360                 1883               1886            4   Decimal
## 361                 1887               1890            4   Decimal
## 362                 1891               1892            2   Integer
##                         description                  comments
## 357    Number of Valid Scale Scores                          
## 358 Partially Proficient Percentage       One implied decimal
## 359           Proficient Percentage       One implied decimal
## 360  Advanced Proficient Percentage       One implied decimal
## 361                Scale Score Mean       One implied decimal
## 362                           Grade Located in text file only
##       valid_values spanner1                       spanner2
## 357   0 or greater  SCIENCE NON-ECONOMICALLY DISADVANTAGED
## 358   0.0 to 100.0  SCIENCE NON-ECONOMICALLY DISADVANTAGED
## 359   0.0 to 100.0  SCIENCE NON-ECONOMICALLY DISADVANTAGED
## 360   0.0 to 100.0  SCIENCE NON-ECONOMICALLY DISADVANTAGED
## 361 100.0 to 300.0  SCIENCE NON-ECONOMICALLY DISADVANTAGED
## 362         03, 04                                        
##                                                                 final_name
## 357    SCIENCE_NON-ECONOMICALLY_DISADVANTAGED_Number_of_Valid_Scale_Scores
## 358 SCIENCE_NON-ECONOMICALLY_DISADVANTAGED_Partially_Proficient_Percentage
## 359           SCIENCE_NON-ECONOMICALLY_DISADVANTAGED_Proficient_Percentage
## 360  SCIENCE_NON-ECONOMICALLY_DISADVANTAGED_Advanced_Proficient_Percentage
## 361                SCIENCE_NON-ECONOMICALLY_DISADVANTAGED_Scale_Score_Mean
## 362                                                                  Grade

Save:

save(layout_njask04, file = paste0(final_path, 'datasets/njask04_layout.rda'))

## Warning in gzfile(file, "wb"): cannot open compressed file 'datasets/
## njask04_layout.rda', probable reason 'No such file or directory'

## Error in gzfile(file, "wb"): cannot open the connection

2007 gr 3 NJASK

this one used a slightly different layout.

Read:

njask07gr3 <- readr::read_csv(paste0(final_path, 'datasets/njask2007gr3_layout.csv'))

## Error: 'datasets/njask2007gr3_layout.csv' does not exist in current working directory ('/Users/almartin/Google Drive/repositories/almart.in-source/content/pages').

names(njask07gr3) <- tolower(gsub(' ', '_', names(njask07gr3)))

## Error in gsub(" ", "_", names(njask07gr3)): object 'njask07gr3' not found

head(njask07gr3)

## Error in head(njask07gr3): object 'njask07gr3' not found

Process:

layout_njask07gr3 <- process_layout(njask07gr3)

## Error in filter_(.data, .dots = lazyeval::lazy_dots(...)): object 'njask07gr3' not found

layout_njask07gr3 %>% head()

##   field_start_position field_end_position field_length data_type
## 1                    1                  9            9      Text
## 2                    1                  2            2      Text
## 3                    3                  6            4      Text
## 4                    7                  9            3      Text
## 5                   10                 59           50      Text
## 6                   60                109           50      Text
##     description comments
## 1      CDS Code         
## 2   County Code         
## 3 District Code         
## 4   School Code         
## 5   County Name         
## 6 District Name         
##                                                                                                                                                                                               valid_values
## 1 CDS codes for schools and districts\nTHE FIRST TWO POSITIONS WILL INCLUDE THE FOLLOWING AGGREGATION CODES: NS = Non-Special Needs; SN = Special Needs; ST = State; A = DFG A; B = DFG B; CD= DFG CD\x85.
## 2                                                                                                   01, 03, 05, 07, 09, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 80, 90
## 3                                                                                                                                      0100 to 9999;  Applicable only for district and school aggregations
## 4                                                                                                                                                      001 to 999; Applicable only for school aggregations
## 5                                                                                                                                     A to Z, blank;  Applicable only for district and school aggregations
## 6                                                                                                                                     A to Z, blank;  Applicable only for district and school aggregations
##   spanner1 spanner2    final_name
## 1                        CDS_Code
## 2                     County_Code
## 3                   District_Code
## 4                     School_Code
## 5                     County_Name
## 6                   District_Name

layout_njask07gr3 %>% tail()

##     field_start_position field_end_position field_length data_type
## 481                 2541               2544            4   Decimal
## 482                 2545               2548            4   Decimal
## 483                 2549               2552            4   Decimal
## 484                 2553               2556            4   Decimal
## 485                 2557               2558            2   Integer
## 486                 2559               2562            4   Integer
##                         description                  comments
## 481 Partially Proficient Percentage       One implied decimal
## 482           Proficient Percentage       One implied decimal
## 483  Advanced Proficient Percentage       One implied decimal
## 484                Scale Score Mean       One implied decimal
## 485                     Grade Level Located in text file only
## 486                       Test Year                          
##                                                                                                                                                                         valid_values
## 481   0.0 to 100.0 for spreadsheet and 0000 to 1000 for text file.  Blank if Number Enrolled = 0 or if Number of Valid Scales Scores = 0 for both unsuppressed and suppressed files.
## 482   0.0 to 100.0 for spreadsheet and 0000 to 1000 for text file.  Blank if Number Enrolled = 0 or if Number of Valid Scales Scores = 0 for both unsuppressed and suppressed files.
## 483   0.0 to 100.0 for spreadsheet and 0000 to 1000 for text file.  Blank if Number Enrolled = 0 or if Number of Valid Scales Scores = 0 for both unsuppressed and suppressed files.
## 484 100.0 to 300.0 for spreadsheet and 1000 to 3000 for text file.  Blank if Number Enrolled = 0 or if Number of Valid Scales Scores = 0 for both unsuppressed and suppressed files.
## 485                                                                                                                                                                           03, 04
## 486                                                                                                                                                                             2007
##                           spanner1 spanner2
## 481 NON-ECONOMICALLY DISADVANTAGED  SCIENCE
## 482 NON-ECONOMICALLY DISADVANTAGED  SCIENCE
## 483 NON-ECONOMICALLY DISADVANTAGED  SCIENCE
## 484 NON-ECONOMICALLY DISADVANTAGED  SCIENCE
## 485                                        
## 486                                        
##                                                                 final_name
## 481 NON-ECONOMICALLY_DISADVANTAGED_SCIENCE_Partially_Proficient_Percentage
## 482           NON-ECONOMICALLY_DISADVANTAGED_SCIENCE_Proficient_Percentage
## 483  NON-ECONOMICALLY_DISADVANTAGED_SCIENCE_Advanced_Proficient_Percentage
## 484                NON-ECONOMICALLY_DISADVANTAGED_SCIENCE_Scale_Score_Mean
## 485                                                            Grade_Level
## 486                                                              Test_Year

Save:

save(layout_njask07gr3, file = paste0(final_path, 'datasets/njask07gr3_layout.rda'))

## Warning in gzfile(file, "wb"): cannot open compressed file 'datasets/
## njask07gr3_layout.rda', probable reason 'No such file or directory'

## Error in gzfile(file, "wb"): cannot open the connection

2006 gr 3 NJASK

sooooo many little changes to these layouts.

Read:

njask06gr3 <- readr::read_csv(paste0(final_path, 'datasets/njask2006gr3_layout.csv'))

## Error: 'datasets/njask2006gr3_layout.csv' does not exist in current working directory ('/Users/almartin/Google Drive/repositories/almart.in-source/content/pages').

names(njask06gr3) <- tolower(gsub(' ', '_', names(njask06gr3)))

## Error in gsub(" ", "_", names(njask06gr3)): object 'njask06gr3' not found

head(njask06gr3)

## Error in head(njask06gr3): object 'njask06gr3' not found

Process:

layout_njask06gr3 <- process_layout(njask06gr3)

## Error in filter_(.data, .dots = lazyeval::lazy_dots(...)): object 'njask06gr3' not found

layout_njask06gr3 %>% head()

##   field_start_position field_end_position field_length data_type
## 1                    1                  9            9      Text
## 2                    1                  2            2      Text
## 3                    3                  6            4      Text
## 4                    7                  9            3      Text
## 5                   10                 59           50      Text
## 6                   60                109           50      Text
##     description comments
## 1      CDS Code         
## 2   County Code         
## 3 District Code         
## 4   School Code         
## 5   County Name         
## 6 District Name         
##                                                                                                                                                                                               valid_values
## 1 CDS codes for schools and districts\nTHE FIRST TWO POSITIONS WILL INCLUDE THE FOLLOWING AGGREGATION CODES: NS = Non-Special Needs; SN = Special Needs; ST = State; A = DFG A; B = DFG B; CD= DFG CD\x85.
## 2                                                                                                   01, 03, 05, 07, 09, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 80, 90
## 3                                                                                                                                                                                             0100 to 9999
## 4                                                                                                                                                                                               001 to 999
## 5                                                                                                                                     A to Z, blank;  Applicable only for district and school aggregations
## 6                                                                                                                                     A to Z, blank;  Applicable only for district and school aggregations
##   spanner1 spanner2    final_name
## 1                        CDS_Code
## 2                     County_Code
## 3                   District_Code
## 4                     School_Code
## 5                     County_Name
## 6                   District_Name

layout_njask06gr3 %>% tail()

##     field_start_position field_end_position field_length data_type
## 455                 2409               2414            6   Integer
## 456                 2415               2418            4   Decimal
## 457                 2419               2422            4   Decimal
## 458                 2423               2426            4   Decimal
## 459                 2427               2430            4   Decimal
## 460                 2431               2432            2   Integer
##                         description                  comments
## 455    Number of Valid Scale Scores                          
## 456 Partially Proficient Percentage       One implied decimal
## 457           Proficient Percentage       One implied decimal
## 458  Advanced Proficient Percentage       One implied decimal
## 459                Scale Score Mean       One implied decimal
## 460                           Grade Located in text file only
##       valid_values spanner1                       spanner2
## 455   0 or greater  SCIENCE NON-ECONOMICALLY DISADVANTAGED
## 456   0.0 to 100.0  SCIENCE NON-ECONOMICALLY DISADVANTAGED
## 457   0.0 to 100.0  SCIENCE NON-ECONOMICALLY DISADVANTAGED
## 458   0.0 to 100.0  SCIENCE NON-ECONOMICALLY DISADVANTAGED
## 459 100.0 to 300.0  SCIENCE NON-ECONOMICALLY DISADVANTAGED
## 460         03, 04                                        
##                                                                 final_name
## 455    SCIENCE_NON-ECONOMICALLY_DISADVANTAGED_Number_of_Valid_Scale_Scores
## 456 SCIENCE_NON-ECONOMICALLY_DISADVANTAGED_Partially_Proficient_Percentage
## 457           SCIENCE_NON-ECONOMICALLY_DISADVANTAGED_Proficient_Percentage
## 458  SCIENCE_NON-ECONOMICALLY_DISADVANTAGED_Advanced_Proficient_Percentage
## 459                SCIENCE_NON-ECONOMICALLY_DISADVANTAGED_Scale_Score_Mean
## 460                                                                  Grade

Save:

save(layout_njask06gr3, file = paste0(final_path, 'datasets/njask06gr3_layout.rda'))

## Warning in gzfile(file, "wb"): cannot open compressed file 'datasets/
## njask06gr3_layout.rda', probable reason 'No such file or directory'

## Error in gzfile(file, "wb"): cannot open the connection

2006 gr 5 NJASK

sigh.

Read:

njask06gr5 <- readr::read_csv(paste0(final_path, 'datasets/njask2006gr5_layout.csv'))

## Error: 'datasets/njask2006gr5_layout.csv' does not exist in current working directory ('/Users/almartin/Google Drive/repositories/almart.in-source/content/pages').

names(njask06gr5) <- tolower(gsub(' ', '_', names(njask06gr5)))

## Error in gsub(" ", "_", names(njask06gr5)): object 'njask06gr5' not found

head(njask06gr5)

## Error in head(njask06gr5): object 'njask06gr5' not found

Process:

layout_njask06gr5 <- process_layout(njask06gr5)

## Error in filter_(.data, .dots = lazyeval::lazy_dots(...)): object 'njask06gr5' not found

layout_njask06gr5 %>% head()

##   field_start_position field_end_position field_length data_type
## 1                    1                  9            9      Text
## 2                    1                  9            9      Text
## 3                    1                  2            2      Text
## 4                    3                  6            4      Text
## 5                    7                  9            3      Text
## 6                   10                 59           50      Text
##     description                                              comments
## 1    RECORD KEY                                                      
## 2      CDS Code                                                      
## 3   County Code                                                      
## 4 District Code Applicable only for district and school aggregations.
## 5   School Code              Applicable only for school aggregations.
## 6   County Name                                                      
##                                                                                                                                                                               valid_values
## 1                                                                                                                                                                                         
## 2 CDS codes for schools and districts\nTHE FIRST TWO POSITIONS WILL INCLUDE THE FOLLOWING AGGREGATION CODES IN LIEU OF THE COUNTY CODE:  ST = State; A = DFG A; B = DFG B; CD= DFG CD\x85.
## 3                                                                                   01, 03, 05, 07, 09, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 80, 90
## 4                                                                                                                                                                      0100 to 9999, blank
## 5                                                                                                                                                                        001 to 999, blank
## 6                                                                                                                     A to Z, blank;  Applicable only for district and school aggregations
##   spanner1 spanner2    final_name
## 1                      RECORD_KEY
## 2                        CDS_Code
## 3                     County_Code
## 4                   District_Code
## 5                     School_Code
## 6                     County_Name

layout_njask06gr5 %>% tail()

##     field_start_position field_end_position field_length data_type
## 458                 2403               2408            6   Integer
## 459                 2409               2414            6   Integer
## 460                 2415               2418            4   Decimal
## 461                 2419               2422            4   Decimal
## 462                 2423               2426            4   Decimal
## 463                 2427               2430            4   Decimal
##                         description            comments valid_values
## 458                 Number of Voids                            Blank
## 459    Number of Valid Scale Scores                            Blank
## 460 Partially Proficient Percentage One implied decimal        Blank
## 461           Proficient Percentage One implied decimal        Blank
## 462  Advanced Proficient Percentage One implied decimal        Blank
## 463                Scale Score Mean One implied decimal        Blank
##     spanner1                       spanner2
## 458  SCIENCE NON-ECONOMICALLY DISADVANTAGED
## 459  SCIENCE NON-ECONOMICALLY DISADVANTAGED
## 460  SCIENCE NON-ECONOMICALLY DISADVANTAGED
## 461  SCIENCE NON-ECONOMICALLY DISADVANTAGED
## 462  SCIENCE NON-ECONOMICALLY DISADVANTAGED
## 463  SCIENCE NON-ECONOMICALLY DISADVANTAGED
##                                                                 final_name
## 458                 SCIENCE_NON-ECONOMICALLY_DISADVANTAGED_Number_of_Voids
## 459    SCIENCE_NON-ECONOMICALLY_DISADVANTAGED_Number_of_Valid_Scale_Scores
## 460 SCIENCE_NON-ECONOMICALLY_DISADVANTAGED_Partially_Proficient_Percentage
## 461           SCIENCE_NON-ECONOMICALLY_DISADVANTAGED_Proficient_Percentage
## 462  SCIENCE_NON-ECONOMICALLY_DISADVANTAGED_Advanced_Proficient_Percentage
## 463                SCIENCE_NON-ECONOMICALLY_DISADVANTAGED_Scale_Score_Mean

Save:

save(layout_njask06gr5, file = paste0(final_path, 'datasets/njask06gr5_layout.rda'))

## Warning in gzfile(file, "wb"): cannot open compressed file 'datasets/
## njask06gr5_layout.rda', probable reason 'No such file or directory'

## Error in gzfile(file, "wb"): cannot open the connection

2009 NJASK

Read:

njask09 <- readr::read_csv(paste0(final_path, 'datasets/njask2009_layout.csv'))

## Error: 'datasets/njask2009_layout.csv' does not exist in current working directory ('/Users/almartin/Google Drive/repositories/almart.in-source/content/pages').

names(njask09) <- tolower(gsub(' ', '_', names(njask09)))

## Error in gsub(" ", "_", names(njask09)): object 'njask09' not found

head(njask09)

## Error in head(njask09): object 'njask09' not found

Process:

layout_njask09 <- process_layout(njask09)

## Error in filter_(.data, .dots = lazyeval::lazy_dots(...)): object 'njask09' not found

layout_njask09 %>% head()

## Error in eval(expr, envir, enclos): object 'layout_njask09' not found

layout_njask09 %>% tail()

## Error in eval(expr, envir, enclos): object 'layout_njask09' not found

Save:

save(layout_njask09, file = paste0(final_path, 'datasets/njask09_layout.rda'))

## Error in save(layout_njask09, file = paste0(final_path, "datasets/njask09_layout.rda")): object 'layout_njask09' not found