Subject Matter Question

What is the approximate spread of < 1980 Citroën model vehicles imported to New Zealand?

Results

Methodology

Data Collection

Data Cleaning

Load and prepare data

# Read in dataset and attach
citroen <-read.csv('citroenTotals.csv', header=TRUE)
attach(citroen)

# Create a list of dates between 1930 - 2021
complete_dat <- tidyr::expand(citroen, MODEL, YEAR = 1930:2021)

# https://stackoverflow.com/questions/65190609/cumulative-stacked-area-plot-for-counts-in-ggplot-with-r
final_dat <- complete_dat %>%
  left_join(citroen, by = c("MODEL", "YEAR")) %>%
  replace_na(list(COUNT = 0)) %>%  # Replace NA with zeros
  group_by(MODEL, YEAR) %>%
  arrange(MODEL, YEAR) %>%  # Arrange by year so adding works
  group_by(MODEL) %>%
  mutate(aggcount = cumsum(COUNT))

# Output the graph
final_dat %>%
  ggplot(aes(x = YEAR, y = aggcount, fill = MODEL)) +
  scale_fill_viridis(discrete = TRUE) +
  ggtitle("Spread of < 1980 Citroën model vehicles imported to New Zealand between 1930 and 2021") +
  geom_area() +
  theme_ipsum() +
  labs(
    x = "Year", 
    y = "Cumulative Count",
    fill = "Model" 
  ) +
   theme(
    axis.text.x = element_text(size = 14),  
    axis.text.y = element_text(size = 14),  
    axis.title.x = element_text(size = 16, hjust=0.5),  
    axis.title.y = element_text(size = 16, hjust=0.5)  
  )

Output summary counts

model_counts <- aggregate(COUNT~MODEL, data=citroen, sum)
print(model_counts)
##                MODEL COUNT
## 1                2CV    28
## 2                5HP     4
## 3              AMI 6     4
## 4           B2 CADDY     1
## 5  C6 FAUX CABRIOLET     3
## 6                 CX    21
## 7                 DS   207
## 8              DYANE    17
## 9                 GS    51
## 10                GT     1
## 11           GX X 50     2
## 12             H VAN     8
## 13              ID19    44
## 14            MEHARI     3
## 15                SM    15
## 16   TRACTION 15 SIX   283
## 17            TYPE H     1