{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Neural Fingerprints\n",
    "\n",
    "We create atom, bond, and edge tensors from molecule SMILES using `chemml.chem.tensorize_molecules` in order to build neural fingerprints using `chemml.models.NeuralGraphHidden` and `chemml.models.NeuralGraphOutput` modules. These neural fingerprints are then used as features to train a simple feed forward neural network to predict densities of small organic compounds using tensorflow. \n",
    "\n",
    "Here we import a sample dataset from ChemML library which has the SMILES codes for 500 small organic molecules with their densities in $kg/m^3$. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from chemml.datasets import load_organic_density\n",
    "molecules, target, dragon_subset = load_organic_density()\n",
    "target = np.asarray(target['density_Kg/m3'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Building `chemml.chem.Molecule` objects from molecule SMILES. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from chemml.chem import Molecule\n",
    "mol_objs_list = []\n",
    "for smi in molecules['smiles']:\n",
    "    mol = Molecule(smi, 'smiles')\n",
    "    mol.hydrogens('add')\n",
    "    mol.to_xyz('MMFF', maxIters=10000, mmffVariant='MMFF94s')\n",
    "    mol_objs_list.append(mol)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Molecule tensors can be used to create neural graph fingerprints using `chemml.models`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Tensorising molecules in batches of 100 ...\n",
      "\u001b[1m500/500\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m8s\u001b[0m 16ms/step \n",
      "Merging batch tensors ...    [DONE]\n"
     ]
    }
   ],
   "source": [
    "from chemml.chem import tensorise_molecules\n",
    "xatoms, xbonds, xedges = tensorise_molecules(molecules=mol_objs_list, max_degree=5, \n",
    "                                        max_atoms=None, n_jobs=-1, batch_size=100, verbose=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Splitting and preprocessing the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import ShuffleSplit\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "y_scale = StandardScaler()\n",
    "rs = ShuffleSplit(n_splits=1, test_size=.20, random_state=42)\n",
    "\n",
    "for train, test in rs.split(mol_objs_list):\n",
    "    xatoms_train = xatoms[train]\n",
    "    xatoms_test = xatoms[test]\n",
    "    xbonds_train = xbonds[train]\n",
    "    xbonds_test = xbonds[test]\n",
    "    xedges_train = xedges[train]\n",
    "    xedges_test = xedges[test]\n",
    "    target_train = target[train]\n",
    "    target_test = target[test]\n",
    "    target_train = y_scale.fit_transform(target_train.reshape(-1,1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training data:\n",
      "\n",
      "Atoms:  (400, 57, 62)\n",
      "Bonds:  (400, 57, 5, 6)\n",
      "Edges:  (400, 57, 5)\n",
      "Target:  (400, 1)\n",
      "\n",
      "Testing data:\n",
      "\n",
      "Atoms:  (100, 57, 62)\n",
      "Bonds:  (100, 57, 5, 6)\n",
      "Edges:  (100, 57, 5)\n",
      "Target:  (100,)\n"
     ]
    }
   ],
   "source": [
    "print('Training data:\\n')\n",
    "print('Atoms: ',xatoms_train.shape)\n",
    "print('Bonds: ',xbonds_train.shape)\n",
    "print('Edges: ',xedges_train.shape)\n",
    "print('Target: ',target_train.shape)\n",
    "\n",
    "print('\\nTesting data:\\n')\n",
    "print('Atoms: ',xatoms_test.shape)\n",
    "print('Bonds: ',xbonds_test.shape)\n",
    "print('Edges: ',xedges_test.shape)\n",
    "print('Target: ',target_test.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Building the Neural Fingerprints\n",
    "\n",
    "The atom, bond, and edge tensors are used here to build 200 neural fingerprints of width 8 (i.e., the size atomic neighborhood which will be considered in the convolution process). "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Neural Fingerprint Shape:  (None, 200)\n"
     ]
    }
   ],
   "source": [
    "from chemml.models import NeuralGraphHidden, NeuralGraphOutput\n",
    "from tensorflow.keras.layers import Input, add\n",
    "import tensorflow as tf\n",
    "tf.random.set_seed(42)\n",
    "\n",
    "conv_width = 8\n",
    "fp_length = 200\n",
    "\n",
    "num_molecules = xatoms_train.shape[0]\n",
    "max_atoms = xatoms_train.shape[1]\n",
    "max_degree = xbonds_train.shape[2]\n",
    "num_atom_features = xatoms_train.shape[-1]\n",
    "num_bond_features = xbonds_train.shape[-1]\n",
    "\n",
    "# Creating input layers for atoms ,bonds and edge information\n",
    "atoms0 = Input(name='atom_inputs', shape=(max_atoms, num_atom_features),batch_size=None)\n",
    "bonds = Input(name='bond_inputs', shape=(max_atoms, max_degree, num_bond_features),batch_size=None)\n",
    "edges = Input(name='edge_inputs', shape=(max_atoms, max_degree), dtype='int32',batch_size=None)\n",
    "\n",
    "# Defining the convolved atom feature layers                                    \n",
    "atoms1 = NeuralGraphHidden(conv_width, activation='relu', use_bias=False)([atoms0, bonds, edges])\n",
    "atoms2 = NeuralGraphHidden(conv_width, activation='relu', use_bias=False)([atoms1, bonds, edges])\n",
    "\n",
    "# Defining the outputs of each (convolved) atom feature layer to fingerprint\n",
    "fp_out0 = NeuralGraphOutput(fp_length, activation='softmax')([atoms0,bonds,edges])\n",
    "fp_out1 = NeuralGraphOutput(fp_length, activation='softmax')([atoms1,bonds,edges])\n",
    "fp_out2 = NeuralGraphOutput(fp_length, activation='softmax')([atoms2,bonds,edges])\n",
    "\n",
    "# Sum outputs to obtain fingerprint                                            \n",
    "final_fp = add([fp_out0, fp_out1, fp_out2])\n",
    "print('Neural Fingerprint Shape: ',final_fp.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Building and training the neural network\n",
    "\n",
    "Here, we build and train a simple feed forward neural network using `tensorflow.keras` and provide our neural fingerprints as features. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:TensorFlow GPU support is not available on native Windows for TensorFlow >= 2.11. Even if CUDA/cuDNN are installed, GPU will not be used. Please use WSL2 or the TensorFlow-DirectML plugin.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"functional\"</span>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1mModel: \"functional\"\u001b[0m\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓\n",
       "┃<span style=\"font-weight: bold\"> Layer (type)        </span>┃<span style=\"font-weight: bold\"> Output Shape      </span>┃<span style=\"font-weight: bold\">    Param # </span>┃<span style=\"font-weight: bold\"> Connected to      </span>┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩\n",
       "│ atom_inputs         │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">57</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">62</span>)    │          <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ -                 │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>)        │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ bond_inputs         │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">57</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">5</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">6</span>)  │          <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ -                 │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>)        │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ edge_inputs         │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">57</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">5</span>)     │          <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ -                 │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>)        │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ neural_graph_hidden │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">57</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">8</span>)     │      <span style=\"color: #00af00; text-decoration-color: #00af00\">2,720</span> │ atom_inputs[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>… │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">NeuralGraphHidden</span>) │                   │            │ bond_inputs[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>… │\n",
       "│                     │                   │            │ edge_inputs[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ neural_graph_hidde… │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">57</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">8</span>)     │        <span style=\"color: #00af00; text-decoration-color: #00af00\">560</span> │ neural_graph_hid… │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">NeuralGraphHidden</span>) │                   │            │ bond_inputs[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>… │\n",
       "│                     │                   │            │ edge_inputs[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ neural_graph_output │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">200</span>)       │     <span style=\"color: #00af00; text-decoration-color: #00af00\">13,800</span> │ atom_inputs[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>… │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">NeuralGraphOutput</span>) │                   │            │ bond_inputs[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>… │\n",
       "│                     │                   │            │ edge_inputs[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ neural_graph_outpu… │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">200</span>)       │      <span style=\"color: #00af00; text-decoration-color: #00af00\">3,000</span> │ neural_graph_hid… │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">NeuralGraphOutput</span>) │                   │            │ bond_inputs[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>… │\n",
       "│                     │                   │            │ edge_inputs[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ neural_graph_outpu… │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">200</span>)       │      <span style=\"color: #00af00; text-decoration-color: #00af00\">3,000</span> │ neural_graph_hid… │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">NeuralGraphOutput</span>) │                   │            │ bond_inputs[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>… │\n",
       "│                     │                   │            │ edge_inputs[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>] │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ add (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Add</span>)           │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">200</span>)       │          <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ neural_graph_out… │\n",
       "│                     │                   │            │ neural_graph_out… │\n",
       "│                     │                   │            │ neural_graph_out… │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ dense_layer0        │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">128</span>)       │     <span style=\"color: #00af00; text-decoration-color: #00af00\">25,728</span> │ add[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]         │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>)             │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ dense_layer1        │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>)        │      <span style=\"color: #00af00; text-decoration-color: #00af00\">8,256</span> │ dense_layer0[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>)             │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ main_prediction     │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>)         │         <span style=\"color: #00af00; text-decoration-color: #00af00\">65</span> │ dense_layer1[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">…</span> │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>)             │                   │            │                   │\n",
       "└─────────────────────┴───────────────────┴────────────┴───────────────────┘\n",
       "</pre>\n"
      ],
      "text/plain": [
       "┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓\n",
       "┃\u001b[1m \u001b[0m\u001b[1mLayer (type)       \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape     \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m   Param #\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mConnected to     \u001b[0m\u001b[1m \u001b[0m┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩\n",
       "│ atom_inputs         │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m57\u001b[0m, \u001b[38;5;34m62\u001b[0m)    │          \u001b[38;5;34m0\u001b[0m │ -                 │\n",
       "│ (\u001b[38;5;33mInputLayer\u001b[0m)        │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ bond_inputs         │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m57\u001b[0m, \u001b[38;5;34m5\u001b[0m, \u001b[38;5;34m6\u001b[0m)  │          \u001b[38;5;34m0\u001b[0m │ -                 │\n",
       "│ (\u001b[38;5;33mInputLayer\u001b[0m)        │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ edge_inputs         │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m57\u001b[0m, \u001b[38;5;34m5\u001b[0m)     │          \u001b[38;5;34m0\u001b[0m │ -                 │\n",
       "│ (\u001b[38;5;33mInputLayer\u001b[0m)        │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ neural_graph_hidden │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m57\u001b[0m, \u001b[38;5;34m8\u001b[0m)     │      \u001b[38;5;34m2,720\u001b[0m │ atom_inputs[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m… │\n",
       "│ (\u001b[38;5;33mNeuralGraphHidden\u001b[0m) │                   │            │ bond_inputs[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m… │\n",
       "│                     │                   │            │ edge_inputs[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ neural_graph_hidde… │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m57\u001b[0m, \u001b[38;5;34m8\u001b[0m)     │        \u001b[38;5;34m560\u001b[0m │ neural_graph_hid… │\n",
       "│ (\u001b[38;5;33mNeuralGraphHidden\u001b[0m) │                   │            │ bond_inputs[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m… │\n",
       "│                     │                   │            │ edge_inputs[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ neural_graph_output │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m200\u001b[0m)       │     \u001b[38;5;34m13,800\u001b[0m │ atom_inputs[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m… │\n",
       "│ (\u001b[38;5;33mNeuralGraphOutput\u001b[0m) │                   │            │ bond_inputs[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m… │\n",
       "│                     │                   │            │ edge_inputs[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ neural_graph_outpu… │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m200\u001b[0m)       │      \u001b[38;5;34m3,000\u001b[0m │ neural_graph_hid… │\n",
       "│ (\u001b[38;5;33mNeuralGraphOutput\u001b[0m) │                   │            │ bond_inputs[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m… │\n",
       "│                     │                   │            │ edge_inputs[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ neural_graph_outpu… │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m200\u001b[0m)       │      \u001b[38;5;34m3,000\u001b[0m │ neural_graph_hid… │\n",
       "│ (\u001b[38;5;33mNeuralGraphOutput\u001b[0m) │                   │            │ bond_inputs[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m… │\n",
       "│                     │                   │            │ edge_inputs[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ add (\u001b[38;5;33mAdd\u001b[0m)           │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m200\u001b[0m)       │          \u001b[38;5;34m0\u001b[0m │ neural_graph_out… │\n",
       "│                     │                   │            │ neural_graph_out… │\n",
       "│                     │                   │            │ neural_graph_out… │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ dense_layer0        │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128\u001b[0m)       │     \u001b[38;5;34m25,728\u001b[0m │ add[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m]         │\n",
       "│ (\u001b[38;5;33mDense\u001b[0m)             │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ dense_layer1        │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m64\u001b[0m)        │      \u001b[38;5;34m8,256\u001b[0m │ dense_layer0[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
       "│ (\u001b[38;5;33mDense\u001b[0m)             │                   │            │                   │\n",
       "├─────────────────────┼───────────────────┼────────────┼───────────────────┤\n",
       "│ main_prediction     │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m)         │         \u001b[38;5;34m65\u001b[0m │ dense_layer1[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m…\u001b[0m │\n",
       "│ (\u001b[38;5;33mDense\u001b[0m)             │                   │            │                   │\n",
       "└─────────────────────┴───────────────────┴────────────┴───────────────────┘\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">57,129</span> (223.16 KB)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m57,129\u001b[0m (223.16 KB)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">57,129</span> (223.16 KB)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m57,129\u001b[0m (223.16 KB)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "<keras.src.callbacks.history.History at 0x1e6eaea49e0>"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from tensorflow.keras import Model\n",
    "from tensorflow.keras.layers import Dense\n",
    "\n",
    "# Build and compile model for regression.\n",
    "dense_layer0 = Dense(128,activation='relu',name='dense_layer0',\n",
    "                     kernel_regularizer=tf.keras.regularizers.l2(0.01))(final_fp)\n",
    "dense_layer1 = Dense(64,activation='relu',name='dense_layer1',\n",
    "                     kernel_regularizer=tf.keras.regularizers.l2(0.01))(dense_layer0)\n",
    "dense_layer2 = Dense(32,activation='relu',name='dense_layer2',\n",
    "                     kernel_regularizer=tf.keras.regularizers.l2(0.01))(dense_layer1)\n",
    "\n",
    "main_prediction = Dense(1, activation='linear', name='main_prediction')(dense_layer1)\n",
    "model = Model(inputs=[atoms0, bonds, edges], outputs=[main_prediction])\n",
    "model.compile(optimizer='adam', loss='mae')\n",
    "\n",
    "# Show summary\n",
    "model.summary()\n",
    "\n",
    "model.fit([xatoms_train, xbonds_train, xedges_train], target_train, epochs=50,\n",
    "          steps_per_epoch=None, batch_size=None,verbose=False,validation_split=0.1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Predicting the density of the molecules in our test data and evaluating our model based on it. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[1m4/4\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 1s/step   \n",
      "Mean Absolute Error = 15.303 kg/m^3\n",
      "R squared = 0.945\n"
     ]
    }
   ],
   "source": [
    "from chemml.utils import regression_metrics\n",
    "\n",
    "y_pred = model.predict([xatoms_test,xbonds_test,xedges_test])\n",
    "y_pred = y_scale.inverse_transform(y_pred)\n",
    "metrics_df = regression_metrics(target_test, list(y_pred.reshape(-1,)))\n",
    "mae = metrics_df['MAE'].values[0]\n",
    "r_2 = metrics_df['r_squared'].values[0]\n",
    "\n",
    "print(\"Mean Absolute Error = {} kg/m^3\".format(mae.round(3)))\n",
    "print(\"R squared = {}\".format(r_2.round(3)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ME</th>\n",
       "      <th>MAE</th>\n",
       "      <th>MSE</th>\n",
       "      <th>RMSE</th>\n",
       "      <th>MSLE</th>\n",
       "      <th>RMSLE</th>\n",
       "      <th>MAPE</th>\n",
       "      <th>MaxAPE</th>\n",
       "      <th>RMSPE</th>\n",
       "      <th>MPE</th>\n",
       "      <th>MaxAE</th>\n",
       "      <th>deltaMaxE</th>\n",
       "      <th>r_squared</th>\n",
       "      <th>std</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-6.17789</td>\n",
       "      <td>15.302616</td>\n",
       "      <td>419.635458</td>\n",
       "      <td>20.485006</td>\n",
       "      <td>0.000286</td>\n",
       "      <td>0.016918</td>\n",
       "      <td>1.238328</td>\n",
       "      <td>7.243467</td>\n",
       "      <td>1.715904</td>\n",
       "      <td>-0.526769</td>\n",
       "      <td>72.840308</td>\n",
       "      <td>111.701436</td>\n",
       "      <td>0.945338</td>\n",
       "      <td>87.617825</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        ME        MAE         MSE       RMSE      MSLE     RMSLE      MAPE  \\\n",
       "0 -6.17789  15.302616  419.635458  20.485006  0.000286  0.016918  1.238328   \n",
       "\n",
       "     MaxAPE     RMSPE       MPE      MaxAE   deltaMaxE  r_squared        std  \n",
       "0  7.243467  1.715904 -0.526769  72.840308  111.701436   0.945338  87.617825  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "metrics_df"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "nitin_py312_env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.13"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "state": {},
    "version_major": 2,
    "version_minor": 0
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}