-
Notifications
You must be signed in to change notification settings - Fork 0
/
OpenAlexEdges.R
101 lines (77 loc) · 2.97 KB
/
OpenAlexEdges.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
authorPubEdges <- function(keywords,pub_start_date,pub_end_date){
keywords <- keywords
pub_start_date <- pub_start_date
pub_end_date <- pub_end_date
# create search engine function
search_engine <- function(keywords,pub_start_date,pub_end_date){
suppressPackageStartupMessages(library(openalexR))
suppressPackageStartupMessages(library(tidyverse))
options(openalexR.mailto = "[email protected]")
# search engine
works_search <- oa_fetch(
entity = "works",
title.search = keywords,
cited_by_count = ">50",
from_publication_date = pub_start_date,
to_publication_date = pub_end_date,
options = list(sort = "cited_by_count:desc"),
verbose = FALSE
)
return(works_search)
}
# import nodes function
source("openAlexNodes.R")
# run author nodes function
author_nodes <- authorPubNodes(keywords,pub_start_date,pub_end_date)
# run search engine
search_data <- search_engine(keywords,pub_start_date,pub_end_date)
# grab authors and group them according to collaboration
authors_collaboration_groups <- list()
for (i in 1:nrow(search_data)){
authors_collaboration_groups[[i]] <- search_data$author[[i]][2]
}
# grab all authors
all_authors <- c()
for (i in 1:length(authors_collaboration_groups)) {
all_authors <- c(all_authors,authors_collaboration_groups[[i]][[1]])
}
# get length of each authors collaboration
authors_length <- c()
for(authors in 1:length(authors_collaboration_groups)){
authors_length <- c(authors_length,authors_collaboration_groups[[authors]] |> nrow())
}
# grab all publications
publications <- list()
for (i in 1:nrow(search_data)){
publications[[i]] <- rep(search_data$display_name[i], each = authors_length[i])
}
# place all publications in a vector
all_publications <- c()
for(i in 1:length(publications)){
all_publications <- c(all_publications,publications[[i]])
}
# create author_to_publication data frame
authors_to_publications <- data.frame(
Authors = all_authors,
Publications = all_publications
)
# create edges data frame
author_publication_edges <- data.frame(
Source = authors_to_publications$Authors,
Target = authors_to_publications$Publications,
Type = "directed",
Weight = 1.0
)
# replace edges with id from nodes data set
replace_edges_with_ids <- function(author_edges, author_nodes) {
# Create a lookup table for node values to their corresponding Ids
node_lookup <- setNames(author_nodes$Id, author_nodes$Node)
# Use the lookup table to replace Source and Target values in author_edges
author_edges$Source <- node_lookup[author_edges$Source]
author_edges$Target <- node_lookup[author_edges$Target]
return(author_edges)
}
# Call the function with your data frames
author_publication_edges <- replace_edges_with_ids(author_publication_edges, author_nodes)
return(author_publication_edges)
}