SDK Tutorial

Customer walkthrough for using `OmClient` with datasets, diligence workflows, jobs, and data loading.

Install and authenticate

Install packagesBash
pip install omtx
Set API key and run a smoke testBash
export OMTX_API_KEY="sk_..."
python - <<'PY'
from omtx import OmClient

with OmClient() as client:
    print(client.status())
    print(client.users.profile())
PY

Dataset catalog and generation visibility

Read public + generated dataset contextPython
from omtx import OmClient

with OmClient() as client:
    catalog = client.datasets.catalog()

print("Public catalog rows:", catalog["catalog"]["count"])
print("Data-generated rows:", catalog["data_generated"]["count"])
print("Generated protein UUIDs:", catalog["accessible_generated_protein_uuids"][:5])
print("Accessible dataset IDs:", catalog["accessible_dataset_ids"][:5])

Customer use cases

  • Explore listings: render catalog.items[] for public datasets.
  • Generated visibility: render data_generated.items[] plus accessible_generated_protein_uuids[] for generated proteins available now.
  • Access control: use accessible_dataset_ids[] to enable or disable download/export controls.

Load dataframes from protein UUID

Combined loading for model training (single call)Python
from omtx import OmClient

with OmClient() as client:
    loaded = client.load_data(
        protein_uuid="550e8400-e29b-41d4-a716-446655440000",
        binders=50000,
        nonbinder_multiplier=5,
        sample_seed=42,
    )

binders = loaded["binders"]
nonbinders = loaded["nonbinders"]
print("Binder rows:", len(binders))
print("Non-binder rows:", len(nonbinders))
binders.show(top_n=24)  # defaults: smiles + binding_score
Separate pool loading for explicit controlPython
from omtx import OmClient

with OmClient() as client:
    binders = client.load_binders(
        protein_uuid="550e8400-e29b-41d4-a716-446655440000",
        n=1000,
        sample_seed=42,
    )
    nonbinders = client.load_nonbinders(
        protein_uuid="550e8400-e29b-41d4-a716-446655440000",
        n=10000,
        sample_seed=42,
    )
# Omit n (or set n=None) to load the full pool.

print("Binder rows:", len(binders))
print("Non-binder rows:", len(nonbinders))
print("Binder columns:", binders.columns)
# show() renders inline in notebooks; no extra display() wrapper needed.
binders.show(top_n=24)  # defaults: smiles + binding_score
binders.show(top_n=24, sort_by="selectivity_score")
Manual shard export (advanced)Python
from omtx import OmClient

with OmClient() as client:
    urls = client.binders.urls(
        protein_uuid="550e8400-e29b-41d4-a716-446655440000",
    )

print("URLs expire at:", urls["expires_at"])
print("Binder shard count:", len(urls["binder_urls"]))
print("Non-binder shard count:", len(urls["non_binder_urls"]))
print("First binder URL:", urls["binder_urls"][0])

Run diligence workflows

Search, gather, and crawl (asynchronous)Python
from omtx import OmClient

with OmClient() as client:
    search_job = client.diligence.search(query="PARP inhibitor resistance mechanisms")
    gather_job = client.diligence.gather(
        query="EGFR inhibitor clinical evidence",
        preset="quick",
    )
    crawl_job = client.diligence.crawl(
        url="https://example.org/egfr-review",
        preset="quick",
    )

    search_result = client.jobs.wait(search_job["job_id"], poll_interval=5, timeout=1800)
    gather_result = client.jobs.wait(gather_job["job_id"], poll_interval=5, timeout=1800)
    crawl_result = client.jobs.wait(crawl_job["job_id"], poll_interval=5, timeout=1800)

print("Search status:", search_result["status"])
print("Gather status:", gather_result["status"])
print("Crawl status:", crawl_result["status"])
Deep diligence and report synthesisPython
from omtx import OmClient

with OmClient() as client:
    dd_job = client.diligence.deep_diligence(
        query="BRAF inhibitor resistance landscape",
        preset="quick",
    )
    dd_result = client.jobs.wait(
        dd_job["job_id"],
        result_endpoint="/v2/jobs/deep-diligence/{job_id}",
        poll_interval=5,
        timeout=1800,
    )

    synth_job = client.diligence.synthesize_report(gene_key="brd4")
    synth_result = client.jobs.wait(
        synth_job["job_id"],
        result_endpoint="/v2/jobs/synthesizeReport/{job_id}",
        poll_interval=5,
        timeout=1800,
    )

print("Deep diligence claims:", dd_result["result"]["total_claims"])
print("Synthesis keys:", list(synth_result.keys()))

Function map for API-key users

Available SDK functionsPython
from omtx import OmClient

with OmClient() as client:
    # Health and account
    client.status()
    client.users.profile()

    # Datasets and shard exports
    client.datasets.catalog()
    client.load_data(
        protein_uuid="550e8400-e29b-41d4-a716-446655440000",
        binders=1000,
        nonbinder_multiplier=5,
        sample_seed=42,
    )
    client.load_binders(
        protein_uuid="550e8400-e29b-41d4-a716-446655440000",
        n=1000,
        sample_seed=42,
    ).show(top_n=24)  # default columns: smiles + binding_score
    client.load_nonbinders(
        protein_uuid="550e8400-e29b-41d4-a716-446655440000",
        n=10000,
        sample_seed=42,
    )
    client.binders.urls(
        protein_uuid="550e8400-e29b-41d4-a716-446655440000",
    )

    # Diligence workflows
    client.diligence.search(query="example")
    client.diligence.gather(query="example", preset="quick")
    client.diligence.crawl(url="https://example.org", preset="quick")
    client.diligence.deep_diligence(query="example", preset="quick")
    client.diligence.synthesize_report(gene_key="brd4")
    client.diligence.list_gene_keys()

    # Artifacts and Hub workflows
    client.artifacts.upload("target.pdb")
    client.artifacts.get("artifact-id")
    client.hub.submit(
        job_type="hub.diffdock",
        payload={
            "protein_artifact_id": "artifact-id",
            "ligand_smiles": "CCO",
        },
    )
    client.hub.diffdock(
        protein_artifact_id="artifact-id",
        ligand_smiles="CCO",
    )

    # Jobs
    client.jobs.history(limit=20, job_type="hub.diffdock")
    client.jobs.status("job_123")
    client.jobs.wait("job_123", poll_interval=5, timeout=1800)

Reliability patterns

Explicit idempotency and timeout handlingPython
from omtx import JobTimeoutError, OMTXError, OmClient

with OmClient() as client:
    try:
        job = client.diligence.deep_diligence(
            query="KRAS mutation treatment pathways",
            preset="quick",
            idempotency_key="dd-kras-2026-02-25",
        )
        result = client.jobs.wait(job["job_id"], timeout=900)
        print(result["status"])
    except JobTimeoutError:
        print("Job did not finish before timeout; resume later with jobs.status(job_id).")
    except OMTXError as exc:
        print("SDK/API error:", exc)