First, figure out how to get Cal League data for a single date.
games <- get_game_pks_mlb(date = '2022-09-03',
level_ids = c(14))
games %>%
select(game_pk, gameDate, teams.away.team.name, teams.home.team.name)
## ── MLB Game Pks data from MLB.com ─────────────────────────── baseballr 1.2.0 ──
## ℹ Data updated: 2022-09-05 13:06:12 PDT
## # A tibble: 17 × 4
## game_pk gameDate teams.away.team.name teams.home.team.name
## <int> <chr> <chr> <chr>
## 1 670189 2022-09-03T20:00:00Z Clearwater Threshers Lakeland Flying Tigers
## 2 670190 2022-09-03T20:05:00Z Clearwater Threshers Lakeland Flying Tigers
## 3 670082 2022-09-03T20:00:00Z Dunedin Blue Jays Tampa Tarpons
## 4 669868 2022-09-03T21:00:00Z Columbia Fireflies Carolina Mudcats
## 5 669623 2022-09-03T21:00:00Z Fayetteville Woodpeckers Down East Wood Ducks
## 6 670159 2022-09-03T22:00:00Z Daytona Tortugas Palm Beach Cardinals
## 7 669970 2022-09-03T22:05:00Z Salem Red Sox Augusta GreenJackets
## 8 669823 2022-09-03T22:05:00Z Myrtle Beach Pelicans Charleston RiverDogs
## 9 669577 2022-09-03T22:05:00Z Delmarva Shorebirds Fredericksburg Nation…
## 10 670052 2022-09-03T22:10:00Z Jupiter Hammerheads St. Lucie Mets
## 11 669397 2022-09-03T22:30:00Z Kannapolis Cannon Ballers Lynchburg Hillcats
## 12 670578 2022-09-03T22:30:00Z Fort Myers Mighty Mussels Bradenton Marauders
## 13 671035 2022-09-04T00:05:00Z Visalia Rawhide Lake Elsinore Storm
## 14 669152 2022-09-04T01:50:00Z Modesto Nuts Fresno Grizzlies
## 15 670716 2022-09-04T02:05:00Z San Jose Giants Stockton Ports
## 16 671071 2022-09-04T02:05:00Z Rancho Cucamonga Quakes Inland Empire 66ers
## 17 670006 2022-09-03T22:30:00Z Dunedin Blue Jays Tampa Tarpons
games %>%
select(game_pk, gameDate, teams.away.team.name, teams.away.team.id, teams.home.team.name, teams.home.team.id)
## ── MLB Game Pks data from MLB.com ─────────────────────────── baseballr 1.2.0 ──
## ℹ Data updated: 2022-09-05 13:06:12 PDT
## # A tibble: 17 × 6
## game_pk gameDate teams.away.team.name teams…¹ teams…² teams…³
## <int> <chr> <chr> <int> <chr> <int>
## 1 670189 2022-09-03T20:00:00Z Clearwater Threshers 566 Lakela… 570
## 2 670190 2022-09-03T20:05:00Z Clearwater Threshers 566 Lakela… 570
## 3 670082 2022-09-03T20:00:00Z Dunedin Blue Jays 424 Tampa … 587
## 4 669868 2022-09-03T21:00:00Z Columbia Fireflies 3705 Caroli… 249
## 5 669623 2022-09-03T21:00:00Z Fayetteville Woodpeckers 3712 Down E… 485
## 6 670159 2022-09-03T22:00:00Z Daytona Tortugas 450 Palm B… 279
## 7 669970 2022-09-03T22:05:00Z Salem Red Sox 414 August… 478
## 8 669823 2022-09-03T22:05:00Z Myrtle Beach Pelicans 521 Charle… 233
## 9 669577 2022-09-03T22:05:00Z Delmarva Shorebirds 548 Freder… 436
## 10 670052 2022-09-03T22:10:00Z Jupiter Hammerheads 479 St. Lu… 507
## 11 669397 2022-09-03T22:30:00Z Kannapolis Cannon Balle… 487 Lynchb… 481
## 12 670578 2022-09-03T22:30:00Z Fort Myers Mighty Musse… 509 Braden… 3390
## 13 671035 2022-09-04T00:05:00Z Visalia Rawhide 516 Lake E… 103
## 14 669152 2022-09-04T01:50:00Z Modesto Nuts 515 Fresno… 259
## 15 670716 2022-09-04T02:05:00Z San Jose Giants 476 Stockt… 524
## 16 671071 2022-09-04T02:05:00Z Rancho Cucamonga Quakes 526 Inland… 401
## 17 670006 2022-09-03T22:30:00Z Dunedin Blue Jays 424 Tampa … 587
## # … with abbreviated variable names ¹teams.away.team.id, ²teams.home.team.name,
## # ³teams.home.team.id
cal_league_ids <- c(526, 476, 515, 516, 524, 401, 259, 103)
rcquakes_id <- 526
elsinore_id <- 103
games %>%
filter(teams.away.team.id %in% cal_league_ids | teams.home.team.id %in% cal_league_ids)
## ── MLB Game Pks data from MLB.com ─────────────────────────── baseballr 1.2.0 ──
## ℹ Data updated: 2022-09-05 13:06:12 PDT
## # A tibble: 4 × 61
## game_pk link gameT…¹ season gameD…² offic…³ resum…⁴ isTie gameN…⁵ publi…⁶
## <int> <chr> <chr> <chr> <chr> <chr> <chr> <lgl> <int> <lgl>
## 1 671035 /api/v1.… R 2022 2022-0… 2022-0… <NA> FALSE 1 TRUE
## 2 669152 /api/v1.… R 2022 2022-0… 2022-0… <NA> FALSE 1 TRUE
## 3 670716 /api/v1.… R 2022 2022-0… 2022-0… <NA> FALSE 1 TRUE
## 4 671071 /api/v1.… R 2022 2022-0… 2022-0… <NA> FALSE 1 TRUE
## # … with 51 more variables: doubleHeader <chr>, gamedayType <chr>,
## # tiebreaker <chr>, calendarEventID <chr>, seasonDisplay <chr>,
## # dayNight <chr>, scheduledInnings <int>, reverseHomeAwayStatus <lgl>,
## # inningBreakLength <int>, gamesInSeries <int>, seriesGameNumber <int>,
## # seriesDescription <chr>, recordSource <chr>, ifNecessary <chr>,
## # ifNecessaryDescription <chr>, rescheduledFrom <chr>,
## # rescheduledFromDate <chr>, description <chr>, resumedFrom <chr>, …
Now grab the data for a bunch of dates.
### Create a sequence of gamedays
game_dates <- seq(ymd('2022-04-08'), ymd('2022-09-03'), by='day') ### Requires lubridate for ymd
### Get game data for each date. "Invalid arguments" will be returned for non-game dates --- Mondays
Sys.time()
## [1] "2022-09-05 13:06:13 PDT"
games <- map_df(.x = game_dates,
~get_game_pks_mlb(.x, level_ids=c(14))
) %>%
filter(teams.away.team.id %in% cal_league_ids | teams.home.team.id %in% cal_league_ids)
## 2022-09-05 13:06:14: Invalid arguments provided
## 2022-09-05 13:06:15: Invalid arguments provided
## 2022-09-05 13:06:17: Invalid arguments provided
## 2022-09-05 13:06:19: Invalid arguments provided
## 2022-09-05 13:06:21: Invalid arguments provided
## 2022-09-05 13:06:22: Invalid arguments provided
## 2022-09-05 13:06:24: Invalid arguments provided
## 2022-09-05 13:06:27: Invalid arguments provided
## 2022-09-05 13:06:29: Invalid arguments provided
## 2022-09-05 13:06:31: Invalid arguments provided
## 2022-09-05 13:06:32: Invalid arguments provided
## 2022-09-05 13:06:34: Invalid arguments provided
## 2022-09-05 13:06:36: Invalid arguments provided
## 2022-09-05 13:06:38: Invalid arguments provided
## 2022-09-05 13:06:38: Invalid arguments provided
## 2022-09-05 13:06:39: Invalid arguments provided
## 2022-09-05 13:06:39: Invalid arguments provided
## 2022-09-05 13:06:40: Invalid arguments provided
## 2022-09-05 13:06:41: Invalid arguments provided
## 2022-09-05 13:06:43: Invalid arguments provided
## 2022-09-05 13:06:45: Invalid arguments provided
## 2022-09-05 13:06:47: Invalid arguments provided
## 2022-09-05 13:06:48: Invalid arguments provided
Sys.time()
## [1] "2022-09-05 13:06:50 PDT"
### Look at the first 10 games to make sure that they are Cal League
games %>%
select(game_pk, gameDate, teams.away.team.name, teams.away.team.id, teams.home.team.name, teams.home.team.id) %>%
head(10)
## ── MLB Game Pks data from MLB.com ─────────────────────────── baseballr 1.2.0 ──
## ℹ Data updated: 2022-09-05 13:06:13 PDT
## # A tibble: 10 × 6
## game_pk gameDate teams.away.team.name teams.…¹ teams…² teams…³
## <int> <chr> <chr> <int> <chr> <int>
## 1 670707 2022-04-09T01:45:00Z Rancho Cucamonga Quakes 526 Visali… 516
## 2 671522 2022-04-09T01:50:00Z Stockton Ports 524 Fresno… 259
## 3 671220 2022-04-09T02:05:00Z Lake Elsinore Storm 103 Inland… 401
## 4 671037 2022-04-09T02:05:00Z San Jose Giants 476 Modest… 515
## 5 671036 2022-04-10T01:05:00Z San Jose Giants 476 Modest… 515
## 6 670708 2022-04-10T01:30:00Z Rancho Cucamonga Quakes 526 Visali… 516
## 7 671524 2022-04-10T01:50:00Z Stockton Ports 524 Fresno… 259
## 8 671217 2022-04-10T02:05:00Z Lake Elsinore Storm 103 Inland… 401
## 9 670712 2022-04-10T20:00:00Z Rancho Cucamonga Quakes 526 Visali… 516
## 10 671519 2022-04-10T20:05:00Z Stockton Ports 524 Fresno… 259
## # … with abbreviated variable names ¹teams.away.team.id, ²teams.home.team.name,
## # ³teams.home.team.id
### Now get additional game info. We use the game_pk variable to retrieve the information.
Sys.time()
## [1] "2022-09-05 13:06:50 PDT"
game_info <- map_df(.x = games$game_pk,
~get_game_info_mlb(.x)
)
Sys.time()
## [1] "2022-09-05 13:11:55 PDT"
### Join the two tibbles on game_pk
Sys.time()
## [1] "2022-09-05 13:11:55 PDT"
games_2022 <- games %>%
left_join(game_info, by="game_pk")
Sys.time()
## [1] "2022-09-05 13:11:55 PDT"
dim(games)
## [1] 503 70
dim(game_info)
## [1] 503 18
dim(games_2022)
## [1] 509 87
### Determine home fields for Elsinore and the Quakes
games_2022 %>%
filter(teams.home.team.id %in% c(elsinore_id, rcquakes_id)) %>%
count(venue.id, venue_name)
## ── MLB Game Pks data from MLB.com ─────────────────────────── baseballr 1.2.0 ──
## ℹ Data updated: 2022-09-05 13:06:13 PDT
## # A tibble: 2 × 3
## venue.id venue_name n
## <int> <chr> <int>
## 1 2516 The Diamond 68
## 2 2854 LoanMart Field 60
Look at the distribution of runs by the different teams. We only really care about the Storm and the Quakes at Loanmart Field.
games_2022 %>%
ggplot(aes(x=teams.home.score)) +
geom_histogram(binwidth = 1) +
facet_wrap(~venue_name) +
labs(title = 'Home Team Score at Home',
subtitle = '2022 Season')
## Warning: Removed 4 rows containing non-finite values (stat_bin).
games_2022 %>%
filter(teams.home.team.id == rcquakes_id) %>%
ggplot(aes(x=teams.home.score)) +
geom_histogram(binwidth = 1) +
facet_grid(rows = vars(teams.away.team.name)) +
labs(title = 'Quakes Score vs Away Team',
subtitle = '2022 Season')
games_2022 %>%
filter(teams.home.team.id == rcquakes_id) %>%
ggplot(aes(x=teams.away.score)) +
geom_histogram(binwidth = 1) +
facet_grid(rows = vars(teams.away.team.name)) +
labs(title = 'Away Team Score vs Quakes',
subtitle = '2022 Season')
temp <- games_2022 %>%
filter(teams.home.team.id == rcquakes_id) %>%
group_by(teams.away.score, teams.home.score) %>%
count(teams.away.score, teams.home.score)
head(temp,10)
## # A tibble: 10 × 3
## # Groups: teams.away.score, teams.home.score [10]
## teams.away.score teams.home.score n
## <int> <int> <int>
## 1 0 8 1
## 2 1 2 1
## 3 1 3 1
## 4 1 4 1
## 5 1 7 1
## 6 1 10 1
## 7 1 18 1
## 8 2 1 1
## 9 2 5 1
## 10 2 6 1
temp %>% ggplot(aes(x=teams.away.score, y=teams.home.score, fill=n)) +
geom_raster(interpolate = FALSE) +
scale_fill_distiller(palette = "Spectral")
## Warning: Raster pixels are placed at uneven vertical intervals and will be
## shifted. Consider using geom_tile() instead.
temp <- games_2022 %>%
group_by(venue_name, teams.away.score, teams.home.score) %>%
count(venue_name, teams.away.score, teams.home.score)
temp %>% ggplot(aes(x=teams.away.score, y=teams.home.score, fill=n)) +
geom_tile() +
scale_fill_gradient(low = "red", high = "green") +
facet_wrap("venue_name")
## Warning: Removed 2 rows containing missing values (geom_tile).
temp <- games_2022 %>%
filter(teams.home.team.id == rcquakes_id)
temp <- table(temp[,c("teams.home.score","teams.away.score")])
### Or, use the tabluated values
melted_z_counts <- melt(temp)
head(melted_z_counts)
## teams.home.score teams.away.score value
## 1 1 0 0
## 2 2 0 0
## 3 3 0 0
## 4 4 0 0
## 5 5 0 0
## 6 6 0 0
names(melted_z_counts) <- c("Quakes_Runs","Away_Runs","Count")
ggplot(data = melted_z_counts, aes(x=Quakes_Runs, y=Away_Runs, fill=Count)) +
geom_tile() + scale_fill_gradient(low = "red", high = "green")
ggplot(data = melted_z_counts, aes(x=Quakes_Runs, y=Away_Runs, z=Count)) +
geom_contour_filled()