Prerequisites
- A ClientId and ClientSecret provided by the Ginkgo Biosecurity team
- A machine with Python v3, and the requests library for making HTTP requests
Add the ClientId and Client secret to your env:
Copy
export CLIENT_ID="YOUR_CLIENT_ID"
export CLIENT_SECRET="YOUR_CLIENT_SECRET"
Create an ApiClient class
Put the following code into a file called api_client.py
Copy
import os
from typing import Iterator, Optional
import requests
class ApiClient:
def __init__(self, verbose: bool = False) -> None:
self.verbose = verbose
self.client_id = os.environ["CLIENT_ID"]
self.client_secret = os.environ["CLIENT_SECRET"]
self.idp_domain = "sso.ginkgobiosecurity.com"
self.api_audience = "https://api.ginkgobiosecurity.com"
self.api_gateway_url = "https://api.ginkgobiosecurity.com"
self.authorization_token = None
def authenticate(self) -> None:
"""
Get the authorization token from the Auth0 server by
POSTing the clientId and clientSecret to the /oauth/token endpoint
"""
data = {
"client_id": self.client_id,
"client_secret": self.client_secret,
"audience": self.api_audience,
"grant_type": "client_credentials",
}
headers = {
"content-type": "application/json",
}
idp_token_url = f"https://{self.idp_domain}/oauth/token"
response = requests.post(idp_token_url, headers=headers, json=data)
token_data = response.json()
self.authorization_token = token_data["access_token"]
def get_resource(self, path: str, params: Optional[dict] = None) -> Optional[dict]:
"""
Use the authorization token to access the secured resource
"""
headers = {
"Authorization": f"Bearer {self.authorization_token}",
}
if path.startswith("/"):
path = path[1:]
resource_url: str = f"{self.api_gateway_url}/{path}"
response = requests.get(resource_url, params=params, headers=headers)
if self.verbose:
print("GET", resource_url, params)
print(response.status_code)
if response.status_code >= 400:
print(f"Error: {response.status_code} {response.text}")
raise Exception(f"Error: {response.status_code} {response.text}")
return response.json()
def get_all_records_v1(self, path: str, params: Optional[dict] = None) -> Iterator[dict]:
"""
A generator that retrieves records from a v1 paginated API, yielding one page at a time.
This is memory-efficient and avoids loading the entire dataset into a single list.
Yields:
list: A list of records representing a single page of results.
"""
offset = 0
num = 1000 # Page size
while True:
pagination_params = {"offset": offset, "num": num}
if params:
pagination_params.update(params)
# Get the current page of results
page_data = self.get_resource(path, pagination_params)
if page_data is None:
print(f"Error retrieving data at offset {offset}. Stopping.")
break # Exit on error
if not page_data:
if self.verbose:
print("Received an empty page. Assuming end of data.")
break # No more records to fetch
if self.verbose:
print(f"Yielding page with {len(page_data)} records from offset {offset}")
# Yield the current page's data instead of appending to a list
yield page_data
# If we got fewer records than requested, we've reached the end
if len(page_data) < num:
break
# Move to the next page
offset += num
def get_all_records_v2(self, path: str, params: Optional[dict] = None) -> Iterator[dict]:
"""
A generator that retrieves records from a v2 paginated API, yielding one page at a time.
This is memory-efficient and avoids loading the entire dataset into a single list.
Yields:
list: A list of records representing a single page of results.
"""
page = None
num = 1000 # Page size
while True:
pagination_params = {"page": page, "num": num}
if params:
pagination_params.update(params)
# Get the current page of results
payload = self.get_resource(path, pagination_params)
if payload is None:
print(f"Error retrieving data at page {page}. Stopping.")
break # Exit on error
data = payload["data"]
has_more = payload["has_more"]
next_page = payload["next_page"]
if not data:
if self.verbose:
print("Received an empty page. Assuming end of data.")
break # No more records to fetch
if self.verbose:
print(f"Yielding page with {len(data)} records from page {page}")
# Yield the current page's data instead of appending to a list
yield data
# If we got fewer records than requested, we've reached the end
if not has_more:
break
# Move to the next page
page = next_page
Invoke the ApiClient
After creating an api_client.py file as described above, you can import it and authenticate:
Copy
from api_client import ApiClient
client = ApiClient(verbose=True)
client.authenticate()
Copy
client.get_resource(path="/v2/hed-outbreaks/event-names")
client.get_resource(path="/v2/hed-outbreaks/events/pathogen-names")
get_all_records_v2(...) to retrieve all records from a paginated API:
Copy
client.get_all_records_v1(path="/v2/active-outbreaks/outbreak-summaries")
for page in generator:
print(f"Got {len(page)} records")