Get the California League Game Data

First, figure out how to get Cal League data for a single date.

  games <- get_game_pks_mlb(date = '2022-09-03',
                            level_ids = c(14))

  games %>%
    select(game_pk, gameDate, teams.away.team.name, teams.home.team.name)
## ── MLB Game Pks data from MLB.com ─────────────────────────── baseballr 1.2.0 ──
## ℹ Data updated: 2022-09-05 13:06:12 PDT
## # A tibble: 17 × 4
##    game_pk gameDate             teams.away.team.name      teams.home.team.name  
##      <int> <chr>                <chr>                     <chr>                 
##  1  670189 2022-09-03T20:00:00Z Clearwater Threshers      Lakeland Flying Tigers
##  2  670190 2022-09-03T20:05:00Z Clearwater Threshers      Lakeland Flying Tigers
##  3  670082 2022-09-03T20:00:00Z Dunedin Blue Jays         Tampa Tarpons         
##  4  669868 2022-09-03T21:00:00Z Columbia Fireflies        Carolina Mudcats      
##  5  669623 2022-09-03T21:00:00Z Fayetteville Woodpeckers  Down East Wood Ducks  
##  6  670159 2022-09-03T22:00:00Z Daytona Tortugas          Palm Beach Cardinals  
##  7  669970 2022-09-03T22:05:00Z Salem Red Sox             Augusta GreenJackets  
##  8  669823 2022-09-03T22:05:00Z Myrtle Beach Pelicans     Charleston RiverDogs  
##  9  669577 2022-09-03T22:05:00Z Delmarva Shorebirds       Fredericksburg Nation…
## 10  670052 2022-09-03T22:10:00Z Jupiter Hammerheads       St. Lucie Mets        
## 11  669397 2022-09-03T22:30:00Z Kannapolis Cannon Ballers Lynchburg Hillcats    
## 12  670578 2022-09-03T22:30:00Z Fort Myers Mighty Mussels Bradenton Marauders   
## 13  671035 2022-09-04T00:05:00Z Visalia Rawhide           Lake Elsinore Storm   
## 14  669152 2022-09-04T01:50:00Z Modesto Nuts              Fresno Grizzlies      
## 15  670716 2022-09-04T02:05:00Z San Jose Giants           Stockton Ports        
## 16  671071 2022-09-04T02:05:00Z Rancho Cucamonga Quakes   Inland Empire 66ers   
## 17  670006 2022-09-03T22:30:00Z Dunedin Blue Jays         Tampa Tarpons
  games %>%
    select(game_pk, gameDate, teams.away.team.name, teams.away.team.id, teams.home.team.name, teams.home.team.id)
## ── MLB Game Pks data from MLB.com ─────────────────────────── baseballr 1.2.0 ──
## ℹ Data updated: 2022-09-05 13:06:12 PDT
## # A tibble: 17 × 6
##    game_pk gameDate             teams.away.team.name     teams…¹ teams…² teams…³
##      <int> <chr>                <chr>                      <int> <chr>     <int>
##  1  670189 2022-09-03T20:00:00Z Clearwater Threshers         566 Lakela…     570
##  2  670190 2022-09-03T20:05:00Z Clearwater Threshers         566 Lakela…     570
##  3  670082 2022-09-03T20:00:00Z Dunedin Blue Jays            424 Tampa …     587
##  4  669868 2022-09-03T21:00:00Z Columbia Fireflies          3705 Caroli…     249
##  5  669623 2022-09-03T21:00:00Z Fayetteville Woodpeckers    3712 Down E…     485
##  6  670159 2022-09-03T22:00:00Z Daytona Tortugas             450 Palm B…     279
##  7  669970 2022-09-03T22:05:00Z Salem Red Sox                414 August…     478
##  8  669823 2022-09-03T22:05:00Z Myrtle Beach Pelicans        521 Charle…     233
##  9  669577 2022-09-03T22:05:00Z Delmarva Shorebirds          548 Freder…     436
## 10  670052 2022-09-03T22:10:00Z Jupiter Hammerheads          479 St. Lu…     507
## 11  669397 2022-09-03T22:30:00Z Kannapolis Cannon Balle…     487 Lynchb…     481
## 12  670578 2022-09-03T22:30:00Z Fort Myers Mighty Musse…     509 Braden…    3390
## 13  671035 2022-09-04T00:05:00Z Visalia Rawhide              516 Lake E…     103
## 14  669152 2022-09-04T01:50:00Z Modesto Nuts                 515 Fresno…     259
## 15  670716 2022-09-04T02:05:00Z San Jose Giants              476 Stockt…     524
## 16  671071 2022-09-04T02:05:00Z Rancho Cucamonga Quakes      526 Inland…     401
## 17  670006 2022-09-03T22:30:00Z Dunedin Blue Jays            424 Tampa …     587
## # … with abbreviated variable names ¹​teams.away.team.id, ²​teams.home.team.name,
## #   ³​teams.home.team.id
  cal_league_ids <- c(526, 476, 515, 516, 524, 401, 259, 103)
  rcquakes_id <- 526
  elsinore_id <- 103
  
  games %>%
    filter(teams.away.team.id %in% cal_league_ids | teams.home.team.id %in% cal_league_ids)
## ── MLB Game Pks data from MLB.com ─────────────────────────── baseballr 1.2.0 ──
## ℹ Data updated: 2022-09-05 13:06:12 PDT
## # A tibble: 4 × 61
##   game_pk link      gameT…¹ season gameD…² offic…³ resum…⁴ isTie gameN…⁵ publi…⁶
##     <int> <chr>     <chr>   <chr>  <chr>   <chr>   <chr>   <lgl>   <int> <lgl>  
## 1  671035 /api/v1.… R       2022   2022-0… 2022-0… <NA>    FALSE       1 TRUE   
## 2  669152 /api/v1.… R       2022   2022-0… 2022-0… <NA>    FALSE       1 TRUE   
## 3  670716 /api/v1.… R       2022   2022-0… 2022-0… <NA>    FALSE       1 TRUE   
## 4  671071 /api/v1.… R       2022   2022-0… 2022-0… <NA>    FALSE       1 TRUE   
## # … with 51 more variables: doubleHeader <chr>, gamedayType <chr>,
## #   tiebreaker <chr>, calendarEventID <chr>, seasonDisplay <chr>,
## #   dayNight <chr>, scheduledInnings <int>, reverseHomeAwayStatus <lgl>,
## #   inningBreakLength <int>, gamesInSeries <int>, seriesGameNumber <int>,
## #   seriesDescription <chr>, recordSource <chr>, ifNecessary <chr>,
## #   ifNecessaryDescription <chr>, rescheduledFrom <chr>,
## #   rescheduledFromDate <chr>, description <chr>, resumedFrom <chr>, …

Now grab the data for a bunch of dates.

  ### Create a sequence of gamedays
  game_dates <- seq(ymd('2022-04-08'), ymd('2022-09-03'), by='day') ### Requires lubridate for ymd

  ### Get game data for each date.  "Invalid arguments" will be returned for non-game dates --- Mondays
  Sys.time()
## [1] "2022-09-05 13:06:13 PDT"
  games <- map_df(.x = game_dates,
                     ~get_game_pks_mlb(.x, level_ids=c(14))
                 ) %>%
              filter(teams.away.team.id %in% cal_league_ids | teams.home.team.id %in% cal_league_ids)
## 2022-09-05 13:06:14: Invalid arguments provided
## 2022-09-05 13:06:15: Invalid arguments provided
## 2022-09-05 13:06:17: Invalid arguments provided
## 2022-09-05 13:06:19: Invalid arguments provided
## 2022-09-05 13:06:21: Invalid arguments provided
## 2022-09-05 13:06:22: Invalid arguments provided
## 2022-09-05 13:06:24: Invalid arguments provided
## 2022-09-05 13:06:27: Invalid arguments provided
## 2022-09-05 13:06:29: Invalid arguments provided
## 2022-09-05 13:06:31: Invalid arguments provided
## 2022-09-05 13:06:32: Invalid arguments provided
## 2022-09-05 13:06:34: Invalid arguments provided
## 2022-09-05 13:06:36: Invalid arguments provided
## 2022-09-05 13:06:38: Invalid arguments provided
## 2022-09-05 13:06:38: Invalid arguments provided
## 2022-09-05 13:06:39: Invalid arguments provided
## 2022-09-05 13:06:39: Invalid arguments provided
## 2022-09-05 13:06:40: Invalid arguments provided
## 2022-09-05 13:06:41: Invalid arguments provided
## 2022-09-05 13:06:43: Invalid arguments provided
## 2022-09-05 13:06:45: Invalid arguments provided
## 2022-09-05 13:06:47: Invalid arguments provided
## 2022-09-05 13:06:48: Invalid arguments provided
  Sys.time()
## [1] "2022-09-05 13:06:50 PDT"
  ### Look at the first 10 games to make sure that they are Cal League
  games %>%
    select(game_pk, gameDate, teams.away.team.name, teams.away.team.id, teams.home.team.name, teams.home.team.id) %>%
    head(10)
## ── MLB Game Pks data from MLB.com ─────────────────────────── baseballr 1.2.0 ──
## ℹ Data updated: 2022-09-05 13:06:13 PDT
## # A tibble: 10 × 6
##    game_pk gameDate             teams.away.team.name    teams.…¹ teams…² teams…³
##      <int> <chr>                <chr>                      <int> <chr>     <int>
##  1  670707 2022-04-09T01:45:00Z Rancho Cucamonga Quakes      526 Visali…     516
##  2  671522 2022-04-09T01:50:00Z Stockton Ports               524 Fresno…     259
##  3  671220 2022-04-09T02:05:00Z Lake Elsinore Storm          103 Inland…     401
##  4  671037 2022-04-09T02:05:00Z San Jose Giants              476 Modest…     515
##  5  671036 2022-04-10T01:05:00Z San Jose Giants              476 Modest…     515
##  6  670708 2022-04-10T01:30:00Z Rancho Cucamonga Quakes      526 Visali…     516
##  7  671524 2022-04-10T01:50:00Z Stockton Ports               524 Fresno…     259
##  8  671217 2022-04-10T02:05:00Z Lake Elsinore Storm          103 Inland…     401
##  9  670712 2022-04-10T20:00:00Z Rancho Cucamonga Quakes      526 Visali…     516
## 10  671519 2022-04-10T20:05:00Z Stockton Ports               524 Fresno…     259
## # … with abbreviated variable names ¹​teams.away.team.id, ²​teams.home.team.name,
## #   ³​teams.home.team.id
  ### Now get additional game info. We use the game_pk variable to retrieve the information.
  Sys.time()
## [1] "2022-09-05 13:06:50 PDT"
  game_info <-  map_df(.x = games$game_pk, 
                          ~get_game_info_mlb(.x)
                      )
  Sys.time()
## [1] "2022-09-05 13:11:55 PDT"
  ### Join the two tibbles on game_pk
  Sys.time()
## [1] "2022-09-05 13:11:55 PDT"
  games_2022 <- games %>%
                  left_join(game_info, by="game_pk")
  Sys.time()
## [1] "2022-09-05 13:11:55 PDT"
  dim(games)
## [1] 503  70
  dim(game_info)
## [1] 503  18
  dim(games_2022)
## [1] 509  87
  ### Determine home fields for Elsinore and the Quakes
  games_2022 %>% 
    filter(teams.home.team.id %in% c(elsinore_id, rcquakes_id)) %>%
    count(venue.id, venue_name)
## ── MLB Game Pks data from MLB.com ─────────────────────────── baseballr 1.2.0 ──
## ℹ Data updated: 2022-09-05 13:06:13 PDT
## # A tibble: 2 × 3
##   venue.id venue_name         n
##      <int> <chr>          <int>
## 1     2516 The Diamond       68
## 2     2854 LoanMart Field    60

Histograms of Run Distribution

Look at the distribution of runs by the different teams. We only really care about the Storm and the Quakes at Loanmart Field.

  games_2022 %>% 
    ggplot(aes(x=teams.home.score)) +
       geom_histogram(binwidth = 1) + 
       facet_wrap(~venue_name) +
       labs(title = 'Home Team Score at Home', 
           subtitle = '2022 Season') 
## Warning: Removed 4 rows containing non-finite values (stat_bin).

  games_2022 %>% 
    filter(teams.home.team.id == rcquakes_id) %>%
    ggplot(aes(x=teams.home.score)) +
      geom_histogram(binwidth = 1)  +
      facet_grid(rows = vars(teams.away.team.name)) +
      labs(title = 'Quakes Score vs Away Team', 
           subtitle = '2022 Season')

  games_2022 %>% 
    filter(teams.home.team.id == rcquakes_id) %>%
    ggplot(aes(x=teams.away.score)) +
      geom_histogram(binwidth = 1)  +
      facet_grid(rows = vars(teams.away.team.name)) +
      labs(title = 'Away Team Score vs Quakes', 
           subtitle = '2022 Season')

Compare Scores at Loanmart Field

  temp <- games_2022 %>%
            filter(teams.home.team.id == rcquakes_id) %>%
            group_by(teams.away.score, teams.home.score) %>%
            count(teams.away.score, teams.home.score)
  head(temp,10)
## # A tibble: 10 × 3
## # Groups:   teams.away.score, teams.home.score [10]
##    teams.away.score teams.home.score     n
##               <int>            <int> <int>
##  1                0                8     1
##  2                1                2     1
##  3                1                3     1
##  4                1                4     1
##  5                1                7     1
##  6                1               10     1
##  7                1               18     1
##  8                2                1     1
##  9                2                5     1
## 10                2                6     1
  temp %>% ggplot(aes(x=teams.away.score, y=teams.home.score, fill=n)) + 
             geom_raster(interpolate = FALSE) +
             scale_fill_distiller(palette = "Spectral")
## Warning: Raster pixels are placed at uneven vertical intervals and will be
## shifted. Consider using geom_tile() instead.

  temp <- games_2022 %>%
            group_by(venue_name, teams.away.score, teams.home.score) %>%
            count(venue_name, teams.away.score, teams.home.score)
  
  temp %>% ggplot(aes(x=teams.away.score, y=teams.home.score, fill=n)) + 
             geom_tile() + 
             scale_fill_gradient(low = "red", high = "green") +
             facet_wrap("venue_name")
## Warning: Removed 2 rows containing missing values (geom_tile).

  temp <- games_2022 %>%
            filter(teams.home.team.id == rcquakes_id)

  temp <- table(temp[,c("teams.home.score","teams.away.score")])
   
  ### Or, use the tabluated values

  melted_z_counts <- melt(temp)
  head(melted_z_counts)
##   teams.home.score teams.away.score value
## 1                1                0     0
## 2                2                0     0
## 3                3                0     0
## 4                4                0     0
## 5                5                0     0
## 6                6                0     0
  names(melted_z_counts) <- c("Quakes_Runs","Away_Runs","Count")
  
  ggplot(data = melted_z_counts, aes(x=Quakes_Runs, y=Away_Runs, fill=Count)) + 
    geom_tile() + scale_fill_gradient(low = "red", high = "green")

  ggplot(data = melted_z_counts, aes(x=Quakes_Runs, y=Away_Runs, z=Count)) + 
    geom_contour_filled()