NSERC PDF publication records
Methods
I downloaded publicly available names of researchers that received NSERC Postdoc Fellowships from 2020-2024. I manually looked them up on Google Scholar and used the “scholar” package to download a list of their publications. I then filtered for papers that were published during or before the year that they received the fellowship. I summed the number of publications.
Additionally, using the order of the names, I also looked at how many first author publications they had. Note, I did not check if they were co-first author on any publications.
Finally, I used “sjrdata” to check how many of those first author pubs were in Q1 journals.
Results
- There are fewer NSERC PDFs in Plant and Tree Biology (Plant Biol.) than in Evolution and Ecology (Evol. Ecol.)
- 50% of Evol. Ecol. NSERC postdocs had 8-17 pubs (median = 11), of which 4-7 were first author papers (median = 6), of which 3-7 were in Q1 journals (median = 5).
- 50% of Plant Biol. NSERC postdocs had 4.5-12 pubs (median = 8), of which 2.5-5 were first author papers (median = 5), of which 2-3.5 were in Q1 journals (median = 3).
Notes
- I did not account for the type of paper.
- For other caveats to this type of data, see this post from Dynamic Ecology.
# devtools::install_github("ikashnitsky/sjrdata")
library(dplyr)
library(ggplot2)
library(sjrdata)
here::i_am("postdocs.R")
journals <- sjr_journals %>%
mutate(journal = stringr::str_to_lower(title),
.keep = "unused") %>%
filter(year == 2023)
people <- readxl::read_xlsx(here::here("NSERC PDF awards.xlsx")) %>% # downloaded from NSERC website
rowwise() %>%
mutate(Discipline = case_when(Discipline == "evolution and ecology" ~ "Evol. Ecol.",
Discipline == "plant and tree biology" ~ "Plant Biol."),
Name = stringr::word(Name, 2, sep = ", ") %>% stringr::str_sub(1, 1) %>%
stringr::str_c(stringr::word(Name, 1, sep = ", "), sep = " "),
ScholarData = list(scholar::get_publications(ScholarID))) %>%
tidyr::unnest(ScholarData) %>%
filter(year <= AwardYear) %>%
group_by(Name) %>%
mutate(FirstAuthor = stringr::word(author, 1, sep = ",") %>%
stringr::word(2) %>%
stringr::str_replace("‐", "-") %>%
stringr::str_replace("õ", "o"),
FirstAuthorPub = stringr::str_detect(Name, FirstAuthor)) %>%
mutate(year_paper = year,
journal = stringr::str_to_lower(journal),
.keep = "unused") %>%
left_join(journals, by = "journal") %>%
filter(!is.na(sjr_best_quartile)) %>%
mutate(FirstAuthorQ1Pub = sjr_best_quartile == "Q1" & FirstAuthorPub == TRUE,
.after = "FirstAuthorPub")```
metrics <- people %>%
group_by(Discipline, AwardYear, Name) %>%
summarise(nPub = n(),
nPub_FirstAuthor = sum(FirstAuthorPub == TRUE, na.rm = TRUE),
nPub_nonFirstAuthor = sum(FirstAuthorPub == FALSE, na.rm = TRUE),
nPub_Q1FirstAuthor = sum(FirstAuthorQ1Pub == TRUE, na.rm = TRUE))
summary <- metrics %>% group_by(Discipline) %>% summarise(across(c(nPub, nPub_FirstAuthor, nPub_Q1FirstAuthor),
list(median = median,
q1 = ~quantile(., prob = 0.25),
q3 = ~quantile(., prob = 0.75))))
summary
plot_pub <- ggplot(metrics, aes(x = Discipline, y = nPub)) +
geom_boxplot() +
ggbeeswarm::geom_quasirandom() +
scale_y_continuous(limits = c(0, 25), breaks = seq(0, 25, 5)) +
ylab("# of publications") +
cowplot::theme_cowplot()
plot_pub
plot_firstauthorpub <- ggplot(metrics, aes(x = Discipline, y = nPub_FirstAuthor)) +
geom_boxplot() +
ggbeeswarm::geom_quasirandom() +
scale_y_continuous(limits = c(0, 15), breaks = seq(0, 15, 5)) +
ylab("# of first author publications") +
cowplot::theme_cowplot()
plot_firstauthorpub
plot_firstauthorq1pub <- ggplot(metrics, aes(x = Discipline, y = nPub_Q1FirstAuthor)) +
geom_boxplot() +
ggbeeswarm::geom_quasirandom() +
scale_y_continuous(limits = c(0, 15), breaks = seq(0, 15, 5)) +
ylab("# of first author pubs. in Q1 journals") +
cowplot::theme_cowplot()
plot_firstauthorq1pub
cowplot::plot_grid(plot_pub, plot_firstauthorpub, plot_firstauthorq1pub,
align = "hv",
nrow = 1) %>%
ggsave(here::here("metrics.pdf"), ., width = 9, height = 4)
{% endhighlight %}