What is the approximate spread of < 1980 Citroën model vehicles imported to New Zealand?
# Read in dataset and attach
citroen <-read.csv('citroenTotals.csv', header=TRUE)
attach(citroen)
# Create a list of dates between 1930 - 2021
complete_dat <- tidyr::expand(citroen, MODEL, YEAR = 1930:2021)
# https://stackoverflow.com/questions/65190609/cumulative-stacked-area-plot-for-counts-in-ggplot-with-r
final_dat <- complete_dat %>%
left_join(citroen, by = c("MODEL", "YEAR")) %>%
replace_na(list(COUNT = 0)) %>% # Replace NA with zeros
group_by(MODEL, YEAR) %>%
arrange(MODEL, YEAR) %>% # Arrange by year so adding works
group_by(MODEL) %>%
mutate(aggcount = cumsum(COUNT))
# Output the graph
final_dat %>%
ggplot(aes(x = YEAR, y = aggcount, fill = MODEL)) +
scale_fill_viridis(discrete = TRUE) +
ggtitle("Spread of < 1980 Citroën model vehicles imported to New Zealand between 1930 and 2021") +
geom_area() +
theme_ipsum() +
labs(
x = "Year",
y = "Cumulative Count",
fill = "Model"
) +
theme(
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
axis.title.x = element_text(size = 16, hjust=0.5),
axis.title.y = element_text(size = 16, hjust=0.5)
)
model_counts <- aggregate(COUNT~MODEL, data=citroen, sum)
print(model_counts)
## MODEL COUNT
## 1 2CV 28
## 2 5HP 4
## 3 AMI 6 4
## 4 B2 CADDY 1
## 5 C6 FAUX CABRIOLET 3
## 6 CX 21
## 7 DS 207
## 8 DYANE 17
## 9 GS 51
## 10 GT 1
## 11 GX X 50 2
## 12 H VAN 8
## 13 ID19 44
## 14 MEHARI 3
## 15 SM 15
## 16 TRACTION 15 SIX 283
## 17 TYPE H 1