9. Research articles vs datasets in chemistry and materials science#

In this notebook, we show how to retrieve meta information about papers and datasets to make a comparison between the number of research articles published in chemistry and materials science (Web of Science Core Collection) and the number of datasets deposited in data repositories (DataCite and ZENODO) from 1996 to 2023.

9.1. Example of data collection using the Web of Science API#

import os
import pandas as pd
import requests
import urllib.parse
import json
import time
import matextract  # noqa: F401

9.1.1. Create a function to request data from Web of Science#

def wos_api_request(i, query, wos_data):
    request = requests.get(
        f"https://api.clarivate.com/apis/wos-starter/v1/documents?db=WOS&q=TS=({urllib.parse.quote(query)})&"
        f"&limit=50&page={i+1}",
        headers={"X-APIKey": os.environ["WOS_API_KEY"]},
    )
    try:
        for wos_record in request.json()["hits"]:
            wos_data.append(wos_record)
    except (
        requests.exceptions.ConnectionError,
        requests.exceptions.JSONDecodeError,
        KeyError,
    ):
        print(f"Resending WoS API request #{i+1}")
        time.sleep(100)
        wos_api_request(i, query, wos_data)

9.1.2. Create a function to get Article and Data Paper document types published between 1996 and 2023#

def process_data(wos_data):
    years = []

    for paper in wos_data:
        if "Article" in paper["types"] or "Data Paper" in paper["types"]:
            if 1996 <= int(paper["source"]["publishYear"]) <= 2023:
                pub_year = int(paper["source"]["publishYear"])
                if len(years) == 0:
                    years.append(
                        {
                            "publication_year": pub_year,
                            "wos_documents": 1,
                        }
                    )
                else:
                    for year in years:
                        if year["publication_year"] == pub_year:
                            year["wos_documents"] += 1
                            break
                    else:
                        years.append(
                            {
                                "publication_year": pub_year,
                                "wos_documents": 1,
                            }
                        )
    return years

9.1.3. Create a function to save the results in a .csv file#

def save_results(file_name, years):
    years.sort(reverse=True, key=lambda years: years["wos_documents"])

    df = pd.DataFrame(data=years)
    column_names = {
        "publication_year": "publication_year",
        "wos_documents": "wos_documents",
    }
    df.rename(columns=column_names, inplace=True)

    df.to_csv(f"./wos_results/{file_name}.csv", sep=";", index=False)

9.1.4. Example of data collection on “thermoelectric materials”#

As an example, we extract from the Web of Science Core Collection the papers published on thermoelectric materials.

wos_data = []
query = "thermoelectric AND materials"

We send an initial WoS API request to assess the number of requests required to extract all the papers in this topic. From the initial response, we extract the total number of records found.

initial_wos_response = requests.get(
    f"https://api.clarivate.com/apis/wos-starter/v1/documents?db=WOS&q=TS=({urllib.parse.quote(query)})&"
    f"&limit=50&page=1",
    headers={"X-APIKey": os.environ["WOS_API_KEY"]},
)
data = initial_wos_response.json()
print(f"Number of papers from the query ({query}): {data['metadata']['total']}")
Number of papers from the query (thermoelectric AND materials): 29704

With the total number of records, we calculate the number of requests required.

wos_requests_required = ((data["metadata"]["total"] - 1) // 50) + 1
print(f"Number of requests required: {wos_requests_required}")
Number of requests required: 595

Now we can extract the papers from WoS. This can take several minutes, depending on the total number of records to be collected.

for i in range(wos_requests_required + 1):
    wos_api_request(i, query, wos_data)
    print(
        f"{(((i + 1) * 100) / wos_requests_required):.1f}% of WoS API requests complete"
    )
0.2% of WoS API requests complete
0.3% of WoS API requests complete
0.5% of WoS API requests complete
0.7% of WoS API requests complete
0.8% of WoS API requests complete
1.0% of WoS API requests complete
1.2% of WoS API requests complete
1.3% of WoS API requests complete
1.5% of WoS API requests complete
1.7% of WoS API requests complete
1.8% of WoS API requests complete
2.0% of WoS API requests complete
2.2% of WoS API requests complete
2.4% of WoS API requests complete
2.5% of WoS API requests complete
2.7% of WoS API requests complete
2.9% of WoS API requests complete
3.0% of WoS API requests complete
3.2% of WoS API requests complete
3.4% of WoS API requests complete
3.5% of WoS API requests complete
3.7% of WoS API requests complete
3.9% of WoS API requests complete
4.0% of WoS API requests complete
4.2% of WoS API requests complete
4.4% of WoS API requests complete
4.5% of WoS API requests complete
4.7% of WoS API requests complete
4.9% of WoS API requests complete
5.0% of WoS API requests complete
5.2% of WoS API requests complete
5.4% of WoS API requests complete
5.5% of WoS API requests complete
5.7% of WoS API requests complete
5.9% of WoS API requests complete
6.1% of WoS API requests complete
6.2% of WoS API requests complete
6.4% of WoS API requests complete
6.6% of WoS API requests complete
6.7% of WoS API requests complete
6.9% of WoS API requests complete
7.1% of WoS API requests complete
7.2% of WoS API requests complete
7.4% of WoS API requests complete
7.6% of WoS API requests complete
7.7% of WoS API requests complete
7.9% of WoS API requests complete
8.1% of WoS API requests complete
8.2% of WoS API requests complete
8.4% of WoS API requests complete
8.6% of WoS API requests complete
8.7% of WoS API requests complete
8.9% of WoS API requests complete
9.1% of WoS API requests complete
9.2% of WoS API requests complete
9.4% of WoS API requests complete
9.6% of WoS API requests complete
9.7% of WoS API requests complete
9.9% of WoS API requests complete
10.1% of WoS API requests complete
10.3% of WoS API requests complete
10.4% of WoS API requests complete
10.6% of WoS API requests complete
10.8% of WoS API requests complete
10.9% of WoS API requests complete
11.1% of WoS API requests complete
11.3% of WoS API requests complete
11.4% of WoS API requests complete
11.6% of WoS API requests complete
11.8% of WoS API requests complete
11.9% of WoS API requests complete
12.1% of WoS API requests complete
12.3% of WoS API requests complete
12.4% of WoS API requests complete
12.6% of WoS API requests complete
12.8% of WoS API requests complete
12.9% of WoS API requests complete
13.1% of WoS API requests complete
13.3% of WoS API requests complete
13.4% of WoS API requests complete
13.6% of WoS API requests complete
13.8% of WoS API requests complete
13.9% of WoS API requests complete
14.1% of WoS API requests complete
14.3% of WoS API requests complete
14.5% of WoS API requests complete
14.6% of WoS API requests complete
14.8% of WoS API requests complete
15.0% of WoS API requests complete
15.1% of WoS API requests complete
15.3% of WoS API requests complete
15.5% of WoS API requests complete
15.6% of WoS API requests complete
15.8% of WoS API requests complete
16.0% of WoS API requests complete
16.1% of WoS API requests complete
16.3% of WoS API requests complete
16.5% of WoS API requests complete
16.6% of WoS API requests complete
16.8% of WoS API requests complete
17.0% of WoS API requests complete
17.1% of WoS API requests complete
17.3% of WoS API requests complete
17.5% of WoS API requests complete
17.6% of WoS API requests complete
17.8% of WoS API requests complete
18.0% of WoS API requests complete
18.2% of WoS API requests complete
18.3% of WoS API requests complete
18.5% of WoS API requests complete
18.7% of WoS API requests complete
18.8% of WoS API requests complete
19.0% of WoS API requests complete
19.2% of WoS API requests complete
19.3% of WoS API requests complete
19.5% of WoS API requests complete
19.7% of WoS API requests complete
19.8% of WoS API requests complete
20.0% of WoS API requests complete
20.2% of WoS API requests complete
20.3% of WoS API requests complete
20.5% of WoS API requests complete
20.7% of WoS API requests complete
20.8% of WoS API requests complete
21.0% of WoS API requests complete
21.2% of WoS API requests complete
21.3% of WoS API requests complete
21.5% of WoS API requests complete
21.7% of WoS API requests complete
21.8% of WoS API requests complete
22.0% of WoS API requests complete
22.2% of WoS API requests complete
22.4% of WoS API requests complete
22.5% of WoS API requests complete
22.7% of WoS API requests complete
22.9% of WoS API requests complete
23.0% of WoS API requests complete
23.2% of WoS API requests complete
23.4% of WoS API requests complete
23.5% of WoS API requests complete
23.7% of WoS API requests complete
23.9% of WoS API requests complete
24.0% of WoS API requests complete
24.2% of WoS API requests complete
24.4% of WoS API requests complete
24.5% of WoS API requests complete
24.7% of WoS API requests complete
24.9% of WoS API requests complete
25.0% of WoS API requests complete
25.2% of WoS API requests complete
25.4% of WoS API requests complete
25.5% of WoS API requests complete
25.7% of WoS API requests complete
25.9% of WoS API requests complete
26.1% of WoS API requests complete
26.2% of WoS API requests complete
26.4% of WoS API requests complete
26.6% of WoS API requests complete
26.7% of WoS API requests complete
26.9% of WoS API requests complete
27.1% of WoS API requests complete
27.2% of WoS API requests complete
27.4% of WoS API requests complete
27.6% of WoS API requests complete
27.7% of WoS API requests complete
27.9% of WoS API requests complete
28.1% of WoS API requests complete
28.2% of WoS API requests complete
28.4% of WoS API requests complete
28.6% of WoS API requests complete
28.7% of WoS API requests complete
28.9% of WoS API requests complete
29.1% of WoS API requests complete
29.2% of WoS API requests complete
29.4% of WoS API requests complete
29.6% of WoS API requests complete
29.7% of WoS API requests complete
29.9% of WoS API requests complete
30.1% of WoS API requests complete
30.3% of WoS API requests complete
30.4% of WoS API requests complete
30.6% of WoS API requests complete
30.8% of WoS API requests complete
30.9% of WoS API requests complete
31.1% of WoS API requests complete
31.3% of WoS API requests complete
31.4% of WoS API requests complete
31.6% of WoS API requests complete
31.8% of WoS API requests complete
31.9% of WoS API requests complete
32.1% of WoS API requests complete
32.3% of WoS API requests complete
32.4% of WoS API requests complete
32.6% of WoS API requests complete
32.8% of WoS API requests complete
32.9% of WoS API requests complete
33.1% of WoS API requests complete
33.3% of WoS API requests complete
33.4% of WoS API requests complete
33.6% of WoS API requests complete
33.8% of WoS API requests complete
33.9% of WoS API requests complete
34.1% of WoS API requests complete
34.3% of WoS API requests complete
34.5% of WoS API requests complete
34.6% of WoS API requests complete
34.8% of WoS API requests complete
35.0% of WoS API requests complete
35.1% of WoS API requests complete
35.3% of WoS API requests complete
35.5% of WoS API requests complete
35.6% of WoS API requests complete
35.8% of WoS API requests complete
36.0% of WoS API requests complete
36.1% of WoS API requests complete
36.3% of WoS API requests complete
36.5% of WoS API requests complete
36.6% of WoS API requests complete
36.8% of WoS API requests complete
37.0% of WoS API requests complete
37.1% of WoS API requests complete
37.3% of WoS API requests complete
37.5% of WoS API requests complete
37.6% of WoS API requests complete
37.8% of WoS API requests complete
38.0% of WoS API requests complete
38.2% of WoS API requests complete
38.3% of WoS API requests complete
38.5% of WoS API requests complete
38.7% of WoS API requests complete
38.8% of WoS API requests complete
39.0% of WoS API requests complete
39.2% of WoS API requests complete
39.3% of WoS API requests complete
39.5% of WoS API requests complete
39.7% of WoS API requests complete
39.8% of WoS API requests complete
40.0% of WoS API requests complete
40.2% of WoS API requests complete
40.3% of WoS API requests complete
40.5% of WoS API requests complete
40.7% of WoS API requests complete
40.8% of WoS API requests complete
41.0% of WoS API requests complete
41.2% of WoS API requests complete
41.3% of WoS API requests complete
41.5% of WoS API requests complete
41.7% of WoS API requests complete
41.8% of WoS API requests complete
42.0% of WoS API requests complete
42.2% of WoS API requests complete
42.4% of WoS API requests complete
42.5% of WoS API requests complete
42.7% of WoS API requests complete
42.9% of WoS API requests complete
43.0% of WoS API requests complete
43.2% of WoS API requests complete
43.4% of WoS API requests complete
43.5% of WoS API requests complete
43.7% of WoS API requests complete
43.9% of WoS API requests complete
44.0% of WoS API requests complete
44.2% of WoS API requests complete
44.4% of WoS API requests complete
44.5% of WoS API requests complete
44.7% of WoS API requests complete
44.9% of WoS API requests complete
45.0% of WoS API requests complete
45.2% of WoS API requests complete
45.4% of WoS API requests complete
45.5% of WoS API requests complete
45.7% of WoS API requests complete
45.9% of WoS API requests complete
46.1% of WoS API requests complete
46.2% of WoS API requests complete
46.4% of WoS API requests complete
46.6% of WoS API requests complete
46.7% of WoS API requests complete
46.9% of WoS API requests complete
47.1% of WoS API requests complete
47.2% of WoS API requests complete
47.4% of WoS API requests complete
47.6% of WoS API requests complete
47.7% of WoS API requests complete
47.9% of WoS API requests complete
48.1% of WoS API requests complete
48.2% of WoS API requests complete
48.4% of WoS API requests complete
48.6% of WoS API requests complete
48.7% of WoS API requests complete
48.9% of WoS API requests complete
49.1% of WoS API requests complete
49.2% of WoS API requests complete
49.4% of WoS API requests complete
49.6% of WoS API requests complete
49.7% of WoS API requests complete
49.9% of WoS API requests complete
50.1% of WoS API requests complete
50.3% of WoS API requests complete
50.4% of WoS API requests complete
50.6% of WoS API requests complete
50.8% of WoS API requests complete
50.9% of WoS API requests complete
51.1% of WoS API requests complete
51.3% of WoS API requests complete
51.4% of WoS API requests complete
51.6% of WoS API requests complete
51.8% of WoS API requests complete
51.9% of WoS API requests complete
52.1% of WoS API requests complete
52.3% of WoS API requests complete
52.4% of WoS API requests complete
52.6% of WoS API requests complete
52.8% of WoS API requests complete
52.9% of WoS API requests complete
53.1% of WoS API requests complete
53.3% of WoS API requests complete
53.4% of WoS API requests complete
53.6% of WoS API requests complete
53.8% of WoS API requests complete
53.9% of WoS API requests complete
54.1% of WoS API requests complete
54.3% of WoS API requests complete
54.5% of WoS API requests complete
54.6% of WoS API requests complete
54.8% of WoS API requests complete
55.0% of WoS API requests complete
55.1% of WoS API requests complete
55.3% of WoS API requests complete
55.5% of WoS API requests complete
55.6% of WoS API requests complete
55.8% of WoS API requests complete
56.0% of WoS API requests complete
56.1% of WoS API requests complete
56.3% of WoS API requests complete
56.5% of WoS API requests complete
56.6% of WoS API requests complete
56.8% of WoS API requests complete
57.0% of WoS API requests complete
57.1% of WoS API requests complete
57.3% of WoS API requests complete
57.5% of WoS API requests complete
57.6% of WoS API requests complete
57.8% of WoS API requests complete
58.0% of WoS API requests complete
58.2% of WoS API requests complete
58.3% of WoS API requests complete
58.5% of WoS API requests complete
58.7% of WoS API requests complete
58.8% of WoS API requests complete
59.0% of WoS API requests complete
59.2% of WoS API requests complete
59.3% of WoS API requests complete
59.5% of WoS API requests complete
59.7% of WoS API requests complete
59.8% of WoS API requests complete
60.0% of WoS API requests complete
60.2% of WoS API requests complete
60.3% of WoS API requests complete
60.5% of WoS API requests complete
60.7% of WoS API requests complete
60.8% of WoS API requests complete
61.0% of WoS API requests complete
61.2% of WoS API requests complete
61.3% of WoS API requests complete
61.5% of WoS API requests complete
61.7% of WoS API requests complete
61.8% of WoS API requests complete
62.0% of WoS API requests complete
62.2% of WoS API requests complete
62.4% of WoS API requests complete
62.5% of WoS API requests complete
62.7% of WoS API requests complete
62.9% of WoS API requests complete
63.0% of WoS API requests complete
63.2% of WoS API requests complete
63.4% of WoS API requests complete
63.5% of WoS API requests complete
63.7% of WoS API requests complete
63.9% of WoS API requests complete
64.0% of WoS API requests complete
64.2% of WoS API requests complete
64.4% of WoS API requests complete
64.5% of WoS API requests complete
64.7% of WoS API requests complete
64.9% of WoS API requests complete
65.0% of WoS API requests complete
65.2% of WoS API requests complete
65.4% of WoS API requests complete
65.5% of WoS API requests complete
65.7% of WoS API requests complete
65.9% of WoS API requests complete
66.1% of WoS API requests complete
66.2% of WoS API requests complete
66.4% of WoS API requests complete
66.6% of WoS API requests complete
66.7% of WoS API requests complete
66.9% of WoS API requests complete
67.1% of WoS API requests complete
67.2% of WoS API requests complete
67.4% of WoS API requests complete
67.6% of WoS API requests complete
67.7% of WoS API requests complete
67.9% of WoS API requests complete
68.1% of WoS API requests complete
68.2% of WoS API requests complete
68.4% of WoS API requests complete
68.6% of WoS API requests complete
68.7% of WoS API requests complete
68.9% of WoS API requests complete
69.1% of WoS API requests complete
69.2% of WoS API requests complete
69.4% of WoS API requests complete
69.6% of WoS API requests complete
69.7% of WoS API requests complete
69.9% of WoS API requests complete
70.1% of WoS API requests complete
70.3% of WoS API requests complete
70.4% of WoS API requests complete
70.6% of WoS API requests complete
70.8% of WoS API requests complete
70.9% of WoS API requests complete
71.1% of WoS API requests complete
71.3% of WoS API requests complete
71.4% of WoS API requests complete
71.6% of WoS API requests complete
71.8% of WoS API requests complete
71.9% of WoS API requests complete
72.1% of WoS API requests complete
72.3% of WoS API requests complete
72.4% of WoS API requests complete
72.6% of WoS API requests complete
72.8% of WoS API requests complete
72.9% of WoS API requests complete
73.1% of WoS API requests complete
73.3% of WoS API requests complete
73.4% of WoS API requests complete
73.6% of WoS API requests complete
73.8% of WoS API requests complete
73.9% of WoS API requests complete
74.1% of WoS API requests complete
74.3% of WoS API requests complete
74.5% of WoS API requests complete
74.6% of WoS API requests complete
74.8% of WoS API requests complete
75.0% of WoS API requests complete
75.1% of WoS API requests complete
75.3% of WoS API requests complete
75.5% of WoS API requests complete
75.6% of WoS API requests complete
75.8% of WoS API requests complete
76.0% of WoS API requests complete
76.1% of WoS API requests complete
76.3% of WoS API requests complete
76.5% of WoS API requests complete
76.6% of WoS API requests complete
76.8% of WoS API requests complete
77.0% of WoS API requests complete
77.1% of WoS API requests complete
77.3% of WoS API requests complete
77.5% of WoS API requests complete
77.6% of WoS API requests complete
77.8% of WoS API requests complete
78.0% of WoS API requests complete
78.2% of WoS API requests complete
78.3% of WoS API requests complete
78.5% of WoS API requests complete
78.7% of WoS API requests complete
78.8% of WoS API requests complete
79.0% of WoS API requests complete
79.2% of WoS API requests complete
79.3% of WoS API requests complete
79.5% of WoS API requests complete
79.7% of WoS API requests complete
79.8% of WoS API requests complete
80.0% of WoS API requests complete
80.2% of WoS API requests complete
80.3% of WoS API requests complete
80.5% of WoS API requests complete
80.7% of WoS API requests complete
80.8% of WoS API requests complete
81.0% of WoS API requests complete
81.2% of WoS API requests complete
81.3% of WoS API requests complete
81.5% of WoS API requests complete
81.7% of WoS API requests complete
81.8% of WoS API requests complete
82.0% of WoS API requests complete
82.2% of WoS API requests complete
82.4% of WoS API requests complete
82.5% of WoS API requests complete
82.7% of WoS API requests complete
82.9% of WoS API requests complete
83.0% of WoS API requests complete
83.2% of WoS API requests complete
83.4% of WoS API requests complete
83.5% of WoS API requests complete
83.7% of WoS API requests complete
83.9% of WoS API requests complete
84.0% of WoS API requests complete
84.2% of WoS API requests complete
84.4% of WoS API requests complete
84.5% of WoS API requests complete
84.7% of WoS API requests complete
84.9% of WoS API requests complete
85.0% of WoS API requests complete
85.2% of WoS API requests complete
85.4% of WoS API requests complete
85.5% of WoS API requests complete
85.7% of WoS API requests complete
85.9% of WoS API requests complete
86.1% of WoS API requests complete
86.2% of WoS API requests complete
86.4% of WoS API requests complete
86.6% of WoS API requests complete
86.7% of WoS API requests complete
86.9% of WoS API requests complete
87.1% of WoS API requests complete
87.2% of WoS API requests complete
87.4% of WoS API requests complete
87.6% of WoS API requests complete
87.7% of WoS API requests complete
87.9% of WoS API requests complete
88.1% of WoS API requests complete
88.2% of WoS API requests complete
88.4% of WoS API requests complete
88.6% of WoS API requests complete
88.7% of WoS API requests complete
88.9% of WoS API requests complete
89.1% of WoS API requests complete
89.2% of WoS API requests complete
89.4% of WoS API requests complete
89.6% of WoS API requests complete
89.7% of WoS API requests complete
89.9% of WoS API requests complete
90.1% of WoS API requests complete
90.3% of WoS API requests complete
90.4% of WoS API requests complete
90.6% of WoS API requests complete
90.8% of WoS API requests complete
90.9% of WoS API requests complete
91.1% of WoS API requests complete
91.3% of WoS API requests complete
91.4% of WoS API requests complete
91.6% of WoS API requests complete
91.8% of WoS API requests complete
91.9% of WoS API requests complete
92.1% of WoS API requests complete
92.3% of WoS API requests complete
92.4% of WoS API requests complete
92.6% of WoS API requests complete
92.8% of WoS API requests complete
92.9% of WoS API requests complete
93.1% of WoS API requests complete
93.3% of WoS API requests complete
93.4% of WoS API requests complete
93.6% of WoS API requests complete
93.8% of WoS API requests complete
93.9% of WoS API requests complete
94.1% of WoS API requests complete
94.3% of WoS API requests complete
94.5% of WoS API requests complete
94.6% of WoS API requests complete
94.8% of WoS API requests complete
95.0% of WoS API requests complete
95.1% of WoS API requests complete
95.3% of WoS API requests complete
95.5% of WoS API requests complete
95.6% of WoS API requests complete
95.8% of WoS API requests complete
96.0% of WoS API requests complete
96.1% of WoS API requests complete
96.3% of WoS API requests complete
96.5% of WoS API requests complete
96.6% of WoS API requests complete
96.8% of WoS API requests complete
97.0% of WoS API requests complete
97.1% of WoS API requests complete
97.3% of WoS API requests complete
97.5% of WoS API requests complete
97.6% of WoS API requests complete
97.8% of WoS API requests complete
98.0% of WoS API requests complete
98.2% of WoS API requests complete
98.3% of WoS API requests complete
98.5% of WoS API requests complete
98.7% of WoS API requests complete
98.8% of WoS API requests complete
99.0% of WoS API requests complete
99.2% of WoS API requests complete
99.3% of WoS API requests complete
99.5% of WoS API requests complete
99.7% of WoS API requests complete
99.8% of WoS API requests complete
100.0% of WoS API requests complete
100.2% of WoS API requests complete

We obtain a list of dictionaries including all the records extracted (as an example, the data for the first paper is shown below). We can save these raw results in a .json file.

wos_data[0]
{'uid': 'WOS:000187876000038',
 'title': 'A critical study of the thermoelectric method of measuring vapor pressure',
 'types': ['Article'],
 'sourceTypes': ['Article'],
 'source': {'sourceTitle': 'JOURNAL OF BIOLOGICAL CHEMISTRY',
  'publishYear': 1938,
  'publishMonth': 'NOV',
  'volume': '126',
  'issue': '1',
  'pages': {'range': '349-360', 'begin': '349', 'end': '360', 'count': 12}},
 'names': {'authors': [{'displayName': 'Roepke, RR',
    'wosStandard': 'Roepke, RR',
    'researcherId': 'FXG-1059-2022'},
   {'displayName': 'Baldes, EJ',
    'wosStandard': 'Baldes, EJ',
    'researcherId': 'EKH-9490-2022'}]},
 'links': {'record': 'https://www.webofscience.com/api/gateway?GWVersion=2&SrcApp=incarcsic_woslite&SrcAuth=WosAPI&KeyUT=WOS:000187876000038&DestLinkType=FullRecord&DestApp=WOS_CPL',
  'citingArticles': 'https://www.webofscience.com/api/gateway?GWVersion=2&SrcApp=incarcsic_woslite&SrcAuth=WosAPI&KeyUT=WOS:000187876000038&DestLinkType=CitingArticles&DestApp=WOS_CPL',
  'references': 'https://www.webofscience.com/api/gateway?GWVersion=2&SrcApp=incarcsic_woslite&SrcAuth=WosAPI&KeyUT=WOS:000187876000038&DestLinkType=CitedReferences&DestApp=WOS',
  'related': 'https://www.webofscience.com/api/gateway?GWVersion=2&SrcApp=incarcsic_woslite&SrcAuth=WosAPI&KeyUT=WOS:000187876000038&DestLinkType=RelatedRecords&DestApp=WOS_CPL'},
 'citations': [{'db': 'WOS', 'count': 10}],
 'identifiers': {'eissn': '1083-351X'},
 'keywords': {'authorKeywords': []}}
with open("wos_data_output.json", "w") as file:
    json.dump(wos_data, file)
with open("wos_data_output.json") as file:
    wos_data = json.load(file)
len(wos_data)
29704

We process the obtained data with the function process_data and save the results in a .csv file with the function save_results.

years = process_data(wos_data)
save_results("thermoelectric", years)

papers = 0
for year in years:
    papers += year["wos_documents"]
print(
    f"Number of papers published between 1996 and 2023 from the query ({query}): {papers}"
)
Number of papers published between 1996 and 2023 from the query (thermoelectric AND materials): 22867

Following this procedure, we have extracted from WoS the data on published papers (Articles and Data Papers) for the main categories of materials studied in chemistry. Using the corresponding APIs, we can also extract the number of datasets published in the DataCite and ZENODO repositories for the same categories.

9.2. Collecting the saved data for all categories from WoS, DataCite and ZENODO#

In this section, we prepare the collected data on papers and datasets to plot the results as a function of the categories and publication years.

from glob import glob
all_results = glob("./wos_results/*.csv")
print(f"files found: {len(all_results)}")
files found: 9
all_results
['./wos_results/nanoparticles.csv',
 './wos_results/polymers.csv',
 './wos_results/mofs.csv',
 './wos_results/photocatalytic.csv',
 './wos_results/biomaterials.csv',
 './wos_results/thermoelectric.csv',
 './wos_results/battery_cathode.csv',
 './wos_results/2Dmaterials.csv',
 './wos_results/semiconductor.csv']

We create a .csv file combining the results of the different categories.

df = pd.read_csv(all_results[0], sep=";")
df.head()
publication_year wos_documents
0 2023 76010
1 2022 79936
2 2021 73154
3 2020 75213
4 2019 70984
df = df.sort_values(by="publication_year").reset_index(drop=True)
df_results = pd.DataFrame(df["publication_year"].reset_index(drop=True)).copy()
df_results.head()
publication_year
0 1996
1 1997
2 1998
3 1999
4 2000
materials_name = [
    "nanoparticles",
    "battery_cathode",
    "photocatalytic",
    "polymers",
    "thermoelectric",
    "mofs",
    "biomaterials",
    "2Dmaterials",
    "semiconductor",
]
for file in materials_name:
    df = pd.read_csv(f"./wos_results/{file}.csv", sep=";")
    df = df.sort_values(by="publication_year").reset_index(drop=True)
    df = df.rename(columns={"wos_documents": file})
    df_results = pd.merge(df_results, df)
df_results.head()
publication_year nanoparticles battery_cathode photocatalytic polymers thermoelectric mofs biomaterials 2Dmaterials semiconductor
0 1996 312 49 15 12406 142 30 233 121 570
1 1997 454 56 18 12661 146 44 240 137 654
2 1998 749 97 25 13879 212 37 294 164 695
3 1999 1022 121 28 14713 226 47 347 178 721
4 2000 1298 160 27 15088 294 45 345 188 666
df_results.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28 entries, 0 to 27
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype
---  ------            --------------  -----
 0   publication_year  28 non-null     int64
 1   nanoparticles     28 non-null     int64
 2   battery_cathode   28 non-null     int64
 3   photocatalytic    28 non-null     int64
 4   polymers          28 non-null     int64
 5   thermoelectric    28 non-null     int64
 6   mofs              28 non-null     int64
 7   biomaterials      28 non-null     int64
 8   2Dmaterials       28 non-null     int64
 9   semiconductor     28 non-null     int64
dtypes: int64(10)
memory usage: 2.3 KB
df_results.to_csv("wos_materials_papers.csv", sep=",", index=False)

9.3. Plot the number of research papers vs. datasets deposited in data repositories in chemistry and materials science per year#

9.3.1. Load data on papers from Web of Science#

df_wos = pd.read_csv("wos_materials_papers.csv", sep=",")
df_wos.head()
publication_year nanoparticles battery_cathode photocatalytic polymers thermoelectric mofs biomaterials 2Dmaterials semiconductor
0 1996 312 49 15 12406 142 30 233 121 570
1 1997 454 56 18 12661 146 44 240 137 654
2 1998 749 97 25 13879 212 37 294 164 695
3 1999 1022 121 28 14713 226 47 347 178 721
4 2000 1298 160 27 15088 294 45 345 188 666

9.3.2. Load data on DataCite datasets#

df_datacite = pd.read_csv("datacite_datasets.csv", sep=";")
df_datacite.head()
publication_year nanoparticles battery_cathode photocatalytic polymers thermoelectric mofs biomaterials 2Dmaterials semiconductor
0 2010 17 NaN NaN 11 NaN 1 1 2 NaN
1 2011 36 NaN NaN 4 1.0 1 2 4 NaN
2 2012 30 NaN NaN 41 NaN 1 2 4 NaN
3 2013 52 NaN NaN 12 1.0 1 2 139 1.0
4 2014 148 1.0 1.0 53 1.0 3 8 33 1.0

9.3.3. Load data on ZENODO datasets#

df_zenodo = pd.read_csv("zenodo_datasets.csv", sep=";")
df_zenodo.head()
publication_year nanoparticles battery_cathode photocatalytic polymers thermoelectric mofs biomaterials 2Dmaterials semiconductor
0 2010 NaN NaN NaN NaN NaN NaN 1.0 NaN NaN
1 2011 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 2012 2.0 NaN NaN 1.0 NaN NaN NaN NaN NaN
3 2013 NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 2014 NaN NaN NaN 2.0 NaN 1.0 NaN NaN NaN

9.3.4. Unify labels and colors#

labels = [
    "nanoparticles",
    "battery cathode materials",
    "photocatalytic materials",
    "polymers",
    "thermoelectric materials",
    "MOFs",
    "biomaterials",
    "2D materials",
    "semiconductor materials",
]
colors = [
    "#1a4a81",
    "#2166ac",
    "#4393c3",
    "#92c5de",
    "#c2e1f2",
    "#f4a582",
    "#d6604d",
    "#b2182b",
    "#67001f",
]

9.3.5. Plot WoS chemistry and materials science papers vs. year using plotly.express#

import plotly.express as px
fig = px.bar(
    data_frame=df_wos,
    x="publication_year",
    y=df_wos.columns[1:],
    barmode="relative",
    color_discrete_sequence=colors,
)

for i, trace in enumerate(fig.data):
    trace.update(name=labels[i])

fig.update_layout(
    {"plot_bgcolor": "#FFFFFF", "paper_bgcolor": "#FFFFFF"},
    width=900,
    height=500,
    margin=dict(l=100, r=40, t=20, b=45),
    font_family="Arial",
    font_color="#646363",
    font_size=20,
    title_font_family="Arial",
    title_font_color="#646363",
    legend_title_text=None,
    legend=dict(font={"size": 18}, yanchor="bottom", y=0.44, xanchor="left", x=0.04),
)

fig.update_yaxes(
    title_text="no. WoS papers",
    showgrid=True,
    gridcolor="#9D9D9C",
    title={"font": {"size": 24}},
    range=(0, 220000),
    tickformat=".2s",
)
fig.update_xaxes(
    title_text=None,
    linecolor="#9D9D9C",
    range=(1995.5, 2023.5),
    tickvals=[1996, 1999, 2002, 2005, 2008, 2011, 2014, 2017, 2020, 2023],
)

fig

9.3.6. Plot DataCite datasets papers vs. year using plotly.express#

fig = px.bar(
    data_frame=df_datacite,
    x="publication_year",
    y=df_datacite.columns[1:],
    barmode="relative",
    color_discrete_sequence=colors,
)

for i, trace in enumerate(fig.data):
    trace.update(name=labels[i])

fig.update_layout(
    {"plot_bgcolor": "#FFFFFF", "paper_bgcolor": "#FFFFFF"},
    width=900,
    height=350,
    margin=dict(l=90, r=40, t=30, b=45),
    font_family="Arial",
    font_color="#646363",
    font_size=20,
    title_font_family="Arial",
    title_font_color="#646363",
    legend_title_text=None,
    legend=dict(font={"size": 18}, yanchor="bottom", y=0.17, xanchor="left", x=0.04),
)

fig.update_yaxes(
    title_text="no. DataCite datasets",
    showgrid=True,
    gridcolor="#9D9D9C",
    title={"font": {"size": 24}},
    range=(0, 2010),
    tickformat=".2s",
)
fig.update_xaxes(
    title_text=None,
    linecolor="#9D9D9C",
    range=(1995.5, 2023.5),
    tickvals=[1996, 1999, 2002, 2005, 2008, 2011, 2014, 2017, 2020, 2023],
)

fig

9.3.7. Plot ZENODO datasets papers vs. year using plotly.express#

fig = px.bar(
    data_frame=df_zenodo,
    x="publication_year",
    y=df_zenodo.columns[1:],
    barmode="relative",
    color_discrete_sequence=colors,
)

for i, trace in enumerate(fig.data):
    trace.update(name=labels[i])

fig.update_layout(
    {"plot_bgcolor": "#FFFFFF", "paper_bgcolor": "#FFFFFF"},
    width=900,
    height=350,
    margin=dict(l=90, r=40, t=30, b=45),
    font_family="Arial",
    font_color="#646363",
    font_size=20,
    title_font_family="Arial",
    title_font_color="#646363",
    legend_title_text=None,
    legend=dict(font={"size": 18}, yanchor="bottom", y=0.16, xanchor="left", x=0.04),
)

fig.update_yaxes(
    title_text="no. ZENODO datasets",
    showgrid=True,
    gridcolor="#9D9D9C",
    title={"font": {"size": 24}},
    range=(0, 402),
    tickformat="000",
)
fig.update_xaxes(
    title_text=None,
    linecolor="#9D9D9C",
    range=(1995.5, 2023.5),
    tickvals=[1996, 1999, 2002, 2005, 2008, 2011, 2014, 2017, 2020, 2023],
)

fig

9.4. Plot Figure 1 with combined sub-plots using plotly.graph_objects#

from plotly.subplots import make_subplots
import plotly.graph_objects as go

9.4.1. Rearrange dataframes using pandas.melt to be able to use pandas.DataFrame.groupby#

df_wos_rearranged = df_wos.melt(
    id_vars="publication_year", var_name="materials", value_name="no_papers"
)
df_wos_rearranged.head()
publication_year materials no_papers
0 1996 nanoparticles 312
1 1997 nanoparticles 454
2 1998 nanoparticles 749
3 1999 nanoparticles 1022
4 2000 nanoparticles 1298
df_datacite_rearranged = df_datacite.melt(
    id_vars="publication_year", var_name="materials", value_name="no_papers"
)
df_datacite_rearranged.head()
publication_year materials no_papers
0 2010 nanoparticles 17.0
1 2011 nanoparticles 36.0
2 2012 nanoparticles 30.0
3 2013 nanoparticles 52.0
4 2014 nanoparticles 148.0
df_zenodo_rearranged = df_zenodo.melt(
    id_vars="publication_year", var_name="materials", value_name="no_papers"
)
df_zenodo_rearranged.head()
publication_year materials no_papers
0 2010 nanoparticles NaN
1 2011 nanoparticles NaN
2 2012 nanoparticles 2.0
3 2013 nanoparticles NaN
4 2014 nanoparticles NaN
colors_dict = dict(
    map(lambda i, j: (i, j), df_wos_rearranged["materials"].unique(), colors)
)
colors_dict
{'nanoparticles': '#1a4a81',
 'battery_cathode': '#2166ac',
 'photocatalytic': '#4393c3',
 'polymers': '#92c5de',
 'thermoelectric': '#c2e1f2',
 'mofs': '#f4a582',
 'biomaterials': '#d6604d',
 '2Dmaterials': '#b2182b',
 'semiconductor': '#67001f'}

9.4.2. Plot combined figure WoS - DataCite - ZENODO#

First, we set up the combined plot.

data_wos = [
    go.Bar(
        name=group,
        x=dfg["publication_year"],
        y=dfg["no_papers"],
        marker_color=colors_dict[group],
    )
    for group, dfg in df_wos_rearranged.groupby(by="materials", sort=False)
]

fig_wos = go.Figure(data=data_wos)
for i, trace in enumerate(fig_wos.data):
    trace.update(name=labels[i])

data_datacite = [
    go.Bar(
        name=group,
        x=dfg["publication_year"],
        y=dfg["no_papers"],
        marker_color=colors_dict[group],
        hovertemplate="%{y:.0f}",
    )
    for group, dfg in df_datacite_rearranged.groupby(by="materials", sort=False)
]

fig_datacite = go.Figure(data=data_datacite)
for i, trace in enumerate(fig_datacite.data):
    trace.update(name=labels[i])

data_zenodo = [
    go.Bar(
        name=group,
        x=dfg["publication_year"],
        y=dfg["no_papers"],
        marker_color=colors_dict[group],
        hovertemplate="%{y:.0f}",
    )
    for group, dfg in df_zenodo_rearranged.groupby(by="materials", sort=False)
]

fig_zenodo = go.Figure(data=data_zenodo)
for i, trace in enumerate(fig_zenodo.data):
    trace.update(name=labels[i])

fig = make_subplots(rows=3, cols=1, row_heights=[0.7, 0.3, 0.3], vertical_spacing=0.1)

for t in fig_wos.data:
    fig.append_trace(t, row=1, col=1)
for t in fig_datacite.data:
    fig.append_trace(t, row=2, col=1)
for t in fig_zenodo.data:
    fig.append_trace(t, row=3, col=1)

Then, we make cosmetic edits to the plot.

fig.update_layout(
    {"plot_bgcolor": "#FFFFFF", "paper_bgcolor": "#FFFFFF"},
    barmode="stack",
    width=900,
    height=850,
    margin=dict(l=100, r=40, t=20, b=45),
    font_family="Arial",
    font_color="#646363",
    font_size=18.9,
    title_font_family="Arial",
    title_font_color="#646363",
    legend_title_text=None,
    legend=dict(
        {"traceorder": "normal"},
        font={"size": 18},
        yanchor="bottom",
        y=0.68,
        xanchor="left",
        x=0.04,
    ),
)

fig.update_traces(showlegend=False, row=2, col=1)
fig.update_traces(showlegend=False, row=3, col=1)

tickvals = [*range(0, 220000, 50000)]
ticktext = ["0k", "50k", "100k", "150k", "200k"]
fig.update_yaxes(
    title_text="WoS papers",
    showgrid=True,
    gridcolor="#9D9D9C",
    title={"font": {"size": 21}},
    range=(0, 220000),
    tickvals=tickvals,
    ticktext=ticktext,
    tickformat=".2s",
    row=1,
    col=1,
)
fig.update_xaxes(
    title_text=None,
    linecolor="#9D9D9C",
    range=(1995.5, 2023.5),
    tickfont=dict(size=17.9),
    tickvals=[1996, 1999, 2002, 2005, 2008, 2011, 2014, 2017, 2020, 2023],
    row=1,
    col=1,
)

tickvals = [*range(0, 2020, 500)]
ticktext = ["0k", "0.5k", "1.0k", "1.5k", "2.0k"]
fig.update_yaxes(
    title_text="DataCite datasets",
    showgrid=True,
    gridcolor="#9D9D9C",
    title={"font": {"size": 21}},
    range=(0, 2020),
    tickvals=tickvals,
    ticktext=ticktext,
    tickformat=".2s",
    row=2,
    col=1,
)
fig.update_xaxes(
    title_text=None,
    linecolor="#9D9D9C",
    range=(1995.5, 2023.5),
    tickfont=dict(size=17.9),
    tickvals=[1996, 1999, 2002, 2005, 2008, 2011, 2014, 2017, 2020, 2023],
    row=2,
    col=1,
)

fig.update_yaxes(
    title_text="ZENODO datasets",
    showgrid=True,
    gridcolor="#9D9D9C",
    title={"font": {"size": 21}},
    range=(0, 2020),
    tickvals=tickvals,
    ticktext=ticktext,
    tickformat=".2s",
    row=3,
    col=1,
)
fig.update_xaxes(
    title_text=None,
    linecolor="#9D9D9C",
    range=(1995.5, 2023.5),
    tickfont=dict(size=17.9),
    tickvals=[1996, 1999, 2002, 2005, 2008, 2011, 2014, 2017, 2020, 2023],
    row=3,
    col=1,
)

fig.add_shape(
    type="rect",
    xref="x domain",
    yref="y domain",
    x0=-0.065,
    y0=-0.1,
    x1=1.02,
    y1=0.05,
    line_width=0,
    fillcolor="#d9d9d9",
    opacity=0.3,
    layer="above",
)
fig.add_shape(
    type="rect",
    xref="x domain",
    yref="y domain",
    x0=-0.065,
    y0=-0.75,
    x1=1.02,
    y1=-0.18,
    line_width=0,
    fillcolor="#d9d9d9",
    opacity=0.25,
    layer="below",
)
fig.add_shape(
    type="rect",
    xref="x domain",
    yref="y domain",
    x0=-0.065,
    y0=-1.41,
    x1=1.02,
    y1=-0.84,
    line_width=0,
    fillcolor="#d9d9d9",
    opacity=0.25,
    layer="below",
)

fig