Presentation Exercise

About Data

The data was about the poll that was conducted from Sept. 15 to Sept. 25 among a sample of U.S. citizens that oversampled young, Black and Hispanic respondents, with 8,327 respondents, and was weighted according to general population benchmarks for U.S. citizens from the U.S.

This the original Image to be reproduced

Prompted chatgpt for the code: Prompt: Please generate R code to visualize demographic data, including variables such as race, income, age, and education, segmented by voter category. Ensure that each variable is plotted as a bar graph with custom colors for the voter categories and arrange the plots in a grid with two columns. After I adjusted the code to try fit what I wanted.

# Suppress warnings while loading libraries
suppressPackageStartupMessages({
  library(flextable)
  library(ggplot2)
  library(gridExtra)
  library(readr)
  library(dplyr)
  library(patchwork)
})

# Read the CSV file without displaying column types
suppressMessages({
  nonvoters <- read_csv("nonvoters_data.csv", show_col_types = FALSE)
})

# Create a new categorical variable 'Age_Category' based on age ranges using mutate and ifelse
nonvoters <- nonvoters %>%
  mutate(Age = ifelse(ppage >= 26 & ppage <= 34, "26-34",
                               ifelse(ppage >= 35 & ppage <= 49, "35-49",
                                      ifelse(ppage >= 50 & ppage <= 64, "50-64", "65+"))))

# creating new labels for the party ID represented by Q30
nonvoters <- nonvoters %>%
  mutate(Q30 = case_when(
    Q30 == 1 ~ "DEMOCRATIC",
    Q30 == 2 ~ "REPUBLICAN",
    Q30 == 3 ~ "INDEPENDENT",
    TRUE ~ as.character(Q30)
  ))

# Define custom colors for voter categories
custom_colors <- c("#ffbc00", "#e082ad", "#ae4dff")

# Plot race by voter category
plot_race <- ggplot(nonvoters, aes(x = race, fill = voter_category)) +
  geom_bar(position = "fill") +
  labs(x = "Race", y = "Percentage", fill = "Voter Category") +
  scale_fill_manual(values = custom_colors) +
  theme_minimal() +
  theme(legend.position = "", axis.title.y = element_blank(), axis.title.x = element_blank()) +
  coord_flip() +
  ggtitle("Race") +
  theme(plot.title = element_text(hjust = 0.5))

# Plot income by voter category
plot_income <- ggplot(nonvoters, aes(x = income_cat, fill = voter_category)) +
  geom_bar(position = "fill") +
  labs(x = "Income", y = "Percentage", fill = "Voter Category") +
  scale_fill_manual(values = custom_colors) +
  theme_minimal() +
  theme(legend.position = "", axis.title.y = element_blank(), axis.title.x = element_blank()) +
  coord_flip() +
  ggtitle("Income") +
  theme(plot.title = element_text(hjust = 0.5))

# Plot Age by voter category
plot_age <- ggplot(nonvoters, aes(x = Age, fill = voter_category)) +
  geom_bar(position = "fill") +
  labs(x = "Age", y = "Percentage", fill = "Voter Category") +
  scale_fill_manual(values = custom_colors) +
  theme_minimal() +
  theme(legend.position = "", axis.title.y = element_blank(), axis.title.x = element_blank()) +
  coord_flip() +
  ggtitle("Age") +
  theme(plot.title = element_text(hjust = 0.5))

# Plot education by voter category
plot_education <- ggplot(nonvoters, aes(x = educ, fill = voter_category)) +
  geom_bar(position = "fill") +
  labs(x = "Education", y = "Percentage", fill = "Voter Category") +
  scale_fill_manual(values = custom_colors) +
  theme_minimal() +
  theme(legend.position = "", axis.title.y = element_blank(), axis.title.x = element_blank()) +
  coord_flip() +
  ggtitle("Education") +
  theme(plot.title = element_text(hjust = 0.5))

# Plot party id by voter category
plot_party <- ggplot(nonvoters, aes(x = Q30, fill = voter_category)) +
  geom_bar(position = "fill") +
  labs(x = "Party ID", y = "Percentage", fill = "") +
  scale_fill_manual(values = custom_colors) +
  theme_minimal() +
 theme(legend.position = "bottom", axis.title.y = element_blank(), axis.title.x = element_blank()) +
  coord_flip() +
  ggtitle("Party ID") +
  theme(plot.title = element_text(hjust = 0.5))

# Arrange plots in a grid
ggpubr::ggarrange   (plot_race + theme(aspect.ratio = 1),
          plot_income + theme(aspect.ratio = 1),
          plot_age + theme(aspect.ratio = 1),
          plot_education + theme(aspect.ratio = 1),
          plot_party + theme(aspect.ratio = 1))

Nonvoters were more likely to have lower incomes; to be young; to have lower levels of education.

# Read WNBA player stats data from CSV file
suppressWarnings(wnba_player_stats <- read_csv("wnba-player-stats.csv", show_col_types = FALSE))

About Data

The data used contains season-level advanced stats for WNBA players by team for the 1997-2019 seasons, from Basketball-Reference.com. It also contains Composite Rating, which blends PER and Win Shares per 40 into a single metric that mimics RAPTOR player ratings. I prompted chatgpt that produced a code, I didn’t use it but it guided to write mine.

The original table to be reproduced.

# Filter the data to include only players who played at least 375 minutes
filtered_data <- wnba_player_stats %>%
  filter(MP >= 375)

# Renaming variables to fit the table 
filtered_data <- filtered_data %>%
  rename(
    PLAYER = Player,
    SEASON = year_ID,
    AGE = Age,
    TEAM = Tm,
    WS40 = WS40,
    COMPOSITE_RATING = Composite_Rating
  )
# Arrange the data to find the best single-season composite ratings
top_players <- filtered_data %>%
  arrange(desc(COMPOSITE_RATING)) %>%
  select(PLAYER, SEASON, AGE, TEAM, PER, WS40, COMPOSITE_RATING) %>%
  head(15)

# Create flextable for the top players
ft <- flextable(top_players)

# Print the flextable
ft

PLAYER	SEASON	AGE	TEAM	PER	WS40	COMPOSITE_RATING
Lauren Jackson	2,006	25	SEA	34.7	0.416	11.5
Cynthia Cooper	1,998	35	HOU	31.1	0.382	10.7
Cynthia Cooper	1,997	34	HOU	32.2	0.385	10.6
Sheryl Swoopes	2,000	29	HOU	32.0	0.361	10.5
Lauren Jackson	2,007	26	SEA	35.0	0.373	10.4
Nneka Ogwumike	2,016	25	LAS	31.3	0.370	10.3
Sylvia Fowles	2,017	31	MIN	30.8	0.350	10.0
Elena Delle Donne	2,019	29	WAS	31.8	0.343	9.9
Elena Delle Donne	2,015	25	CHI	32.8	0.346	9.4
Lauren Jackson	2,003	22	SEA	32.1	0.333	9.2
Cynthia Cooper	1,999	36	HOU	29.5	0.335	9.2
Yolanda Griffith	1,999	29	SAC	31.9	0.330	9.1
Lauren Jackson	2,010	29	SEA	27.9	0.333	8.8
Tamika Catchings	2,007	27	IND	29.4	0.325	8.6
Nneka Ogwumike	2,017	26	LAS	28.1	0.321	8.5

The table shows Cooper-Dyke’s early WNBA seasons are among the best ever Best single-season composite ratings — based on a mix of player efficiency rating (PER) and win shares per 40 minutes — for WNBA players who played at least 375 minutes that season, 1997-2019