-
Notifications
You must be signed in to change notification settings - Fork 0
/
h_index_calculation.py
37 lines (27 loc) · 1.29 KB
/
h_index_calculation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import pandas as pd
INPUT_FILE_PATH = "raw_data.csv"
OUTPUT_FILE_PATH = "conferences_h_indices.csv"
def calculate_h_index(citations):
citation_count = len(citations)
citations.sort(reverse=True)
h_index = 0
is_sufficient = True
for i in range(1, citation_count + 1):
for citation in citations[:i]:
if citation < i:
is_sufficient = False
break
if is_sufficient:
h_index += 1
else:
break
return h_index
df = pd.read_csv(INPUT_FILE_PATH)[["DOI", "conference", "volume", "citations", "year"]]
filtered_df = df[df["DOI"] != -1][["conference", "volume", "citations", "year"]]
grouping = filtered_df.groupby(["conference", "volume", "year"], as_index=False )
h_index_df = grouping.agg(lambda citations: calculate_h_index(list(citations.values))).rename(columns={"citations": "h-index"})
h_index_df["number_of_papers"] = grouping.count()["citations"]
h_index_df["citations_per_paper_average"] = grouping.mean()["citations"]
h_index_df["type"] = h_index_df["volume"].apply(lambda volume: "2-Findings" if "finding" in volume.lower() else "1-Main")
h_index_df.to_csv(OUTPUT_FILE_PATH, index=False)
print("H-index calculated and saved into conferences_h_indices.csv")