Arthur Leung

NSERC PDF publication records

Methods

I downloaded publicly available names of researchers that received NSERC Postdoc Fellowships from 2020-2024. I manually looked them up on Google Scholar and used the “scholar” package to download a list of their publications. I then filtered for papers that were published during or before the year that they received the fellowship. I summed the number of publications.

Additionally, using the order of the names, I also looked at how many first author publications they had. Note, I did not check if they were co-first author on any publications.

Finally, I used “sjrdata” to check how many of those first author pubs were in Q1 journals.

Results

boxplot of nserc pdf awardees, # pubs, # first author pubs, # first author pubs in Q1 journals

Notes

# devtools::install_github("ikashnitsky/sjrdata")

library(dplyr)
library(ggplot2)
library(sjrdata)

here::i_am("postdocs.R")

journals <- sjr_journals %>%
  mutate(journal = stringr::str_to_lower(title),
         .keep = "unused") %>%
  filter(year == 2023)

people <- readxl::read_xlsx(here::here("NSERC PDF awards.xlsx")) %>% # downloaded from NSERC website
  rowwise() %>%
  mutate(Discipline = case_when(Discipline == "evolution and ecology" ~ "Evol. Ecol.",
                                Discipline == "plant and tree biology" ~ "Plant Biol."),
         Name = stringr::word(Name, 2, sep = ", ") %>% stringr::str_sub(1, 1) %>%
           stringr::str_c(stringr::word(Name, 1, sep = ", "), sep = " "),
         ScholarData = list(scholar::get_publications(ScholarID))) %>%
  tidyr::unnest(ScholarData) %>%
  filter(year <= AwardYear) %>%
  group_by(Name) %>%
  mutate(FirstAuthor = stringr::word(author, 1, sep = ",") %>%
           stringr::word(2) %>%
           stringr::str_replace("‐", "-") %>%
           stringr::str_replace("õ", "o"),
         FirstAuthorPub = stringr::str_detect(Name, FirstAuthor)) %>%
  mutate(year_paper = year,
         journal = stringr::str_to_lower(journal),
         .keep = "unused") %>%
  left_join(journals, by = "journal") %>%
  filter(!is.na(sjr_best_quartile)) %>%
  mutate(FirstAuthorQ1Pub = sjr_best_quartile == "Q1" & FirstAuthorPub == TRUE,
         .after = "FirstAuthorPub")```

metrics <- people %>%
  group_by(Discipline, AwardYear, Name) %>%
  summarise(nPub = n(),
            nPub_FirstAuthor = sum(FirstAuthorPub == TRUE, na.rm = TRUE),
            nPub_nonFirstAuthor = sum(FirstAuthorPub == FALSE, na.rm = TRUE),
            nPub_Q1FirstAuthor = sum(FirstAuthorQ1Pub == TRUE, na.rm = TRUE))

summary <- metrics %>% group_by(Discipline) %>% summarise(across(c(nPub, nPub_FirstAuthor, nPub_Q1FirstAuthor),
                                                      list(median = median,
                                                           q1 = ~quantile(., prob = 0.25),
                                                           q3 = ~quantile(., prob = 0.75))))
summary

plot_pub <- ggplot(metrics, aes(x = Discipline, y = nPub)) +
  geom_boxplot() +
  ggbeeswarm::geom_quasirandom() +
  scale_y_continuous(limits = c(0, 25), breaks = seq(0, 25, 5)) +
  ylab("# of publications") +
  cowplot::theme_cowplot()
plot_pub

plot_firstauthorpub <- ggplot(metrics, aes(x = Discipline, y = nPub_FirstAuthor)) +
  geom_boxplot() +
  ggbeeswarm::geom_quasirandom() +
  scale_y_continuous(limits = c(0, 15), breaks = seq(0, 15, 5)) +
  ylab("# of first author publications") +
  cowplot::theme_cowplot()
plot_firstauthorpub

plot_firstauthorq1pub <- ggplot(metrics, aes(x = Discipline, y = nPub_Q1FirstAuthor)) +
  geom_boxplot() +
  ggbeeswarm::geom_quasirandom() +
  scale_y_continuous(limits = c(0, 15), breaks = seq(0, 15, 5)) +
  ylab("# of first author pubs. in Q1 journals") +
  cowplot::theme_cowplot()
plot_firstauthorq1pub

cowplot::plot_grid(plot_pub, plot_firstauthorpub, plot_firstauthorq1pub,
                   align = "hv",
                   nrow = 1) %>%
  ggsave(here::here("metrics.pdf"), ., width = 9, height = 4)



{% endhighlight %}

#Career #Code #Science