Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 118 additions & 66 deletions examples/00_hello_foundry/hello_foundry.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,16 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": "from foundry import Foundry\n\n# Create a Foundry client (uses HTTPS download by default)\n# For cloud environments (Colab, etc.), add: no_browser=True, no_local_server=True\nf = Foundry()"
"source": [
"from foundry import Foundry\n",
"\n",
"# Create a Foundry client (uses HTTPS download by default)\n",
"# For cloud environments (Colab, etc.), add: no_browser=True, no_local_server=True\n",
"f = Foundry()"
]
},
{
"cell_type": "markdown",
Expand Down Expand Up @@ -99,59 +105,19 @@
" <td>root=2022</td>\n",
" <td>10.18126/jos5-wj65</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>foundry_assorted_computational_band_gaps_v1.1</td>\n",
" <td>Graph Network Based Deep Learning of Band Gaps...</td>\n",
" <td>root=2021</td>\n",
" <td>10.18126/7io9-1z9k</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>foundry_experimental_band_gaps_v1.1</td>\n",
" <td>Graph Network Based Deep Learning of Band Gaps...</td>\n",
" <td>root=2021</td>\n",
" <td>10.18126/wg3u-g8vu</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>foundry_aflow_band_gaps_v1.1</td>\n",
" <td>Graph Network Based Deep Learning of Band Gaps...</td>\n",
" <td>root=2021</td>\n",
" <td>10.18126/6fdy-bsam</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>foundry_oqmd_band_gaps_v1.1</td>\n",
" <td>Graph Network Based Deep Learning of Band Gaps...</td>\n",
" <td>root=2021</td>\n",
" <td>10.18126/w1ey-9y8b</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" dataset_name \\\n",
"0 foundry_g4mp2_solvation_v1.2 \n",
"1 foundry_assorted_computational_band_gaps_v1.1 \n",
"2 foundry_experimental_band_gaps_v1.1 \n",
"3 foundry_aflow_band_gaps_v1.1 \n",
"4 foundry_oqmd_band_gaps_v1.1 \n",
" dataset_name \\\n",
"0 foundry_g4mp2_solvation_v1.2 \n",
"\n",
" title year \\\n",
"0 DFT Estimates of Solvation Energy in Multiple ... root=2022 \n",
"1 Graph Network Based Deep Learning of Band Gaps... root=2021 \n",
"2 Graph Network Based Deep Learning of Band Gaps... root=2021 \n",
"3 Graph Network Based Deep Learning of Band Gaps... root=2021 \n",
"4 Graph Network Based Deep Learning of Band Gaps... root=2021 \n",
"\n",
" DOI FoundryDataset \n",
"0 10.18126/jos5-wj65 <foundry.foundry_dataset.FoundryDataset object... \n",
"1 10.18126/7io9-1z9k <foundry.foundry_dataset.FoundryDataset object... \n",
"2 10.18126/wg3u-g8vu <foundry.foundry_dataset.FoundryDataset object... \n",
"3 10.18126/6fdy-bsam <foundry.foundry_dataset.FoundryDataset object... \n",
"4 10.18126/w1ey-9y8b <foundry.foundry_dataset.FoundryDataset object... "
"0 10.18126/jos5-wj65 <foundry.foundry_dataset.FoundryDataset object... "
]
},
"execution_count": 3,
Expand Down Expand Up @@ -187,7 +153,7 @@
"<h2>DFT Estimates of Solvation Energy in Multiple Solvents</h2>Ward, Logan; Dandu, Naveen; Blaiszik, Ben; Narayanan, Badri; Assary, Rajeev S.; Redfern, Paul C.; Foster, Ian; Curtiss, Larry A.<p>DOI: 10.18126/jos5-wj65</p><h3>Dataset</h3><table><tr><th>short_name</th><td>g4mp2_solvation</td></tr><tr><th>data_type</th><td>tabular</td></tr><tr><th>task_type</th><td><ul><li>supervised</li></ul></td></tr><tr><th>domain</th><td><ul><li>materials science</li><li>chemistry</li></ul></td></tr><tr><th>n_items</th><td>130258.0</td></tr><tr><th>splits</th><td><ul><li><table><tr><th>type</th><td>train</td></tr><tr><th>path</th><td>g4mp2_data.json</td></tr><tr><th>label</th><td>train</td></tr></table></li></ul></td></tr><tr><th>keys</th><td><table><tr><th>key</th><th>type</th><th>filter</th><th>description</th><th>units</th><th>classes</th></tr><tr><td><ul><li>smiles_0</li></ul></td><td>input</td><td></td><td>Input SMILES string</td><td></td><td></td></tr><tr><td><ul><li>smiles_1</li></ul></td><td>input</td><td></td><td>SMILES string after relaxation</td><td></td><td></td></tr><tr><td><ul><li>inchi_0</li></ul></td><td>input</td><td></td><td>InChi after generating coordinates with CORINA</td><td></td><td></td></tr><tr><td><ul><li>inchi_1</li></ul></td><td>input</td><td></td><td>InChi after relaxation</td><td></td><td></td></tr><tr><td><ul><li>xyz</li></ul></td><td>input</td><td></td><td>InChi after relaxation</td><td>XYZ coordinates after relaxation</td><td></td></tr><tr><td><ul><li>atomic_charges</li></ul></td><td>input</td><td></td><td>Atomic charges on each atom, as predicted from B3LYP</td><td></td><td></td></tr><tr><td><ul><li>A</li></ul></td><td>input</td><td></td><td>Rotational constant, A</td><td>GHz</td><td></td></tr><tr><td><ul><li>B</li></ul></td><td>input</td><td></td><td>Rotational constant, B</td><td>GHz</td><td></td></tr><tr><td><ul><li>C</li></ul></td><td>input</td><td></td><td>Rotational constant, C</td><td>GHz</td><td></td></tr><tr><td><ul><li>inchi_1</li></ul></td><td>input</td><td></td><td>InChi after relaxation</td><td></td><td></td></tr><tr><td><ul><li>n_electrons</li></ul></td><td>input</td><td></td><td>Number of electrons</td><td></td><td></td></tr><tr><td><ul><li>n_heavy_atoms</li></ul></td><td>input</td><td></td><td>Number of non-hydrogen atoms</td><td></td><td></td></tr><tr><td><ul><li>n_atom</li></ul></td><td>input</td><td></td><td>Number of atoms in molecule</td><td></td><td></td></tr><tr><td><ul><li>mu</li></ul></td><td>input</td><td></td><td>Dipole moment</td><td>D</td><td></td></tr><tr><td><ul><li>alpha</li></ul></td><td>input</td><td></td><td>Isotropic polarizability</td><td>a_0^3</td><td></td></tr><tr><td><ul><li>R2</li></ul></td><td>input</td><td></td><td>Electronic spatial extant</td><td>a_0^2</td><td></td></tr><tr><td><ul><li>cv</li></ul></td><td>input</td><td></td><td>Heat capacity at 298.15K</td><td>cal/mol-K</td><td></td></tr><tr><td><ul><li>g4mp2_hf298</li></ul></td><td>target</td><td></td><td>G4MP2 Standard Enthalpy of Formation, 298K</td><td>kcal/mol</td><td></td></tr><tr><td><ul><li>bandgap</li></ul></td><td>input</td><td></td><td>B3LYP Band gap energy</td><td>Ha</td><td></td></tr><tr><td><ul><li>homo</li></ul></td><td>input</td><td></td><td>B3LYP Energy of HOMO</td><td>Ha</td><td></td></tr><tr><td><ul><li>lumo</li></ul></td><td>input</td><td></td><td>B3LYP Energy of LUMO</td><td>Ha</td><td></td></tr><tr><td><ul><li>zpe</li></ul></td><td>input</td><td></td><td>B3LYP Zero point vibrational energy</td><td>Ha</td><td></td></tr><tr><td><ul><li>u0</li></ul></td><td>input</td><td></td><td>B3LYP Internal energy at 0K</td><td>Ha</td><td></td></tr><tr><td><ul><li>u</li></ul></td><td>input</td><td></td><td>B3LYP Internal energy at 298.15K</td><td>Ha</td><td></td></tr><tr><td><ul><li>h</li></ul></td><td>input</td><td></td><td>B3LYP Enthalpy at 298.15K</td><td>Ha</td><td></td></tr><tr><td><ul><li>u0_atom</li></ul></td><td>input</td><td></td><td>B3LYP atomization energy at 0K</td><td>Ha</td><td></td></tr><tr><td><ul><li>g</li></ul></td><td>input</td><td></td><td>B3LYP Free energy at 298.15K</td><td>Ha</td><td></td></tr><tr><td><ul><li>g4mp2_0k</li></ul></td><td>target</td><td></td><td>G4MP2 Internal energy at 0K</td><td>Ha</td><td></td></tr><tr><td><ul><li>g4mp2_energy</li></ul></td><td>target</td><td></td><td>G4MP2 Internal energy at 298.15K</td><td>Ha</td><td></td></tr><tr><td><ul><li>g4mp2_enthalpy</li></ul></td><td>target</td><td></td><td>G4MP2 Enthalpy at 298.15K</td><td>Ha</td><td></td></tr><tr><td><ul><li>g4mp2_free</li></ul></td><td>target</td><td></td><td>G4MP2 Free eergy at 0K</td><td>Ha</td><td></td></tr><tr><td><ul><li>g4mp2_atom</li></ul></td><td>target</td><td></td><td>G4MP2 atomization energy at 0K</td><td>Ha</td><td></td></tr><tr><td><ul><li>sol_acetone</li></ul></td><td>target</td><td></td><td>Solvation energy, acetone</td><td>kcal/mol</td><td></td></tr><tr><td><ul><li>sol_acn</li></ul></td><td>target</td><td></td><td>Solvation energy, acetonitrile</td><td>kcal/mol</td><td></td></tr><tr><td><ul><li>sol_dmso</li></ul></td><td>target</td><td></td><td>Solvation energy, dimethyl sulfoxide</td><td>kcal/mol</td><td></td></tr><tr><td><ul><li>sol_ethanol</li></ul></td><td>target</td><td></td><td>Solvation energy, ethanol</td><td>kcal/mol</td><td></td></tr><tr><td><ul><li>sol_water</li></ul></td><td>target</td><td></td><td>Solvation energy, water</td><td>kcal/mol</td><td></td></tr></table></td></tr></table>"
],
"text/plain": [
"<foundry.foundry_dataset.FoundryDataset at 0x1342b8230>"
"<foundry.foundry_dataset.FoundryDataset at 0x140201070>"
]
},
"execution_count": 4,
Expand All @@ -214,10 +180,70 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": "# Get the schema - what columns/fields are in this dataset?\nschema = dataset.get_schema()\n\nprint(f\"Dataset: {schema['name']}\")\nprint(f\"Data Type: {schema['data_type']}\")\nprint(f\"\\nSplits: {[s['name'] for s in schema['splits']]}\")\nprint(f\"\\nFields:\")\nfor field in schema['fields']:\n print(f\" - {field['name']} ({field['role']}): {field['description'] or 'No description'}\")"
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset: foundry_g4mp2_solvation_v1.2\n",
"Data Type: tabular\n",
"\n",
"Splits: ['train']\n",
"\n",
"Fields:\n",
" - smiles_0 (input): Input SMILES string\n",
" - smiles_1 (input): SMILES string after relaxation\n",
" - inchi_0 (input): InChi after generating coordinates with CORINA\n",
" - inchi_1 (input): InChi after relaxation\n",
" - xyz (input): InChi after relaxation\n",
" - atomic_charges (input): Atomic charges on each atom, as predicted from B3LYP\n",
" - A (input): Rotational constant, A\n",
" - B (input): Rotational constant, B\n",
" - C (input): Rotational constant, C\n",
" - inchi_1 (input): InChi after relaxation\n",
" - n_electrons (input): Number of electrons\n",
" - n_heavy_atoms (input): Number of non-hydrogen atoms\n",
" - n_atom (input): Number of atoms in molecule\n",
" - mu (input): Dipole moment\n",
" - alpha (input): Isotropic polarizability\n",
" - R2 (input): Electronic spatial extant\n",
" - cv (input): Heat capacity at 298.15K\n",
" - g4mp2_hf298 (target): G4MP2 Standard Enthalpy of Formation, 298K\n",
" - bandgap (input): B3LYP Band gap energy\n",
" - homo (input): B3LYP Energy of HOMO\n",
" - lumo (input): B3LYP Energy of LUMO\n",
" - zpe (input): B3LYP Zero point vibrational energy\n",
" - u0 (input): B3LYP Internal energy at 0K\n",
" - u (input): B3LYP Internal energy at 298.15K\n",
" - h (input): B3LYP Enthalpy at 298.15K\n",
" - u0_atom (input): B3LYP atomization energy at 0K\n",
" - g (input): B3LYP Free energy at 298.15K\n",
" - g4mp2_0k (target): G4MP2 Internal energy at 0K\n",
" - g4mp2_energy (target): G4MP2 Internal energy at 298.15K\n",
" - g4mp2_enthalpy (target): G4MP2 Enthalpy at 298.15K\n",
" - g4mp2_free (target): G4MP2 Free eergy at 0K\n",
" - g4mp2_atom (target): G4MP2 atomization energy at 0K\n",
" - sol_acetone (target): Solvation energy, acetone\n",
" - sol_acn (target): Solvation energy, acetonitrile\n",
" - sol_dmso (target): Solvation energy, dimethyl sulfoxide\n",
" - sol_ethanol (target): Solvation energy, ethanol\n",
" - sol_water (target): Solvation energy, water\n"
]
}
],
"source": [
"# Get the schema - what columns/fields are in this dataset?\n",
"schema = dataset.get_schema()\n",
"\n",
"print(f\"Dataset: {schema['name']}\")\n",
"print(f\"Data Type: {schema['data_type']}\")\n",
"print(f\"\\nSplits: {[s['name'] for s in schema['splits']]}\")\n",
"print(f\"\\nFields:\")\n",
"for field in schema['fields']:\n",
" print(f\" - {field['name']} ({field['role']}): {field['description'] or 'No description'}\")"
]
},
{
"cell_type": "markdown",
Expand All @@ -230,22 +256,14 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Processing records: 100%|█████████████████████████████████| 1/1 [00:00<00:00, 3266.59it/s]\n",
"Transferring data: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Error: GC_NOT_CONNECTED - globus connect offline\n"
"Data keys: dict_keys(['train'])\n"
]
}
],
Expand All @@ -259,9 +277,20 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Training data shape: <class 'tuple'>\n",
"\n",
"Inputs (X): <class 'pandas.core.frame.DataFrame'>\n",
"Targets (y): <class 'pandas.core.frame.DataFrame'>\n"
]
}
],
"source": [
"# For ML datasets, data is typically split into inputs (X) and targets (y)\n",
"# Let's explore the training split\n",
Expand All @@ -287,9 +316,17 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Foundry works with PyTorch and TensorFlow out of the box!\n"
]
}
],
"source": [
"# For PyTorch users:\n",
"# torch_dataset = dataset.get_as_torch(split='train')\n",
Expand All @@ -314,9 +351,24 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"@misc{https://doi.org/10.18126/jos5-wj65\n",
"doi = {10.18126/jos5-wj65}\n",
"url = {https://doi.org/10.18126/jos5-wj65}\n",
"author = {Ward, Logan and Dandu, Naveen and Blaiszik, Ben and Narayanan, Badri and Assary, Rajeev S. and Redfern, Paul C. and Foster, Ian and Curtiss, Larry A.}\n",
"title = {DFT Estimates of Solvation Energy in Multiple Solvents}\n",
"keywords = {machine learning, foundry}\n",
"publisher = {Materials Data Facility}\n",
"year = {root=2022}}\n"
]
}
],
"source": [
"# Get BibTeX citation\n",
"citation = dataset.get_citation()\n",
Expand Down Expand Up @@ -368,4 +420,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}
Loading
Loading