Xplainable

Author

Benedict Thekkel

import xplainable as xplainable
import pandas as pd
from sklearn.model_selection import train_test_split
from IPython.display import HTML
from pathlib import Path

from nbdevAuto.functions import * 
import nbdevAuto.functions

name = 'titanic'
path = Path(f'Data/{name}')
kaggle_download(name = name)

file exists

df = pd.read_csv(path/'train.csv')
df.head()

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Ticket	Fare	Cabin	Embarked
0	1	0	3	Braund, Mr. Owen Harris	male	22.0	1	A/5 21171	7.2500	NaN	S
1	2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	female	38.0	1	PC 17599	71.2833	C85	C
2	3	1	3	Heikkinen, Miss. Laina	female	26.0	0	STON/O2. 3101282	7.9250	NaN	S
3	4	1	1	Futrelle, Mrs. Jacques Heath (Lily May Peel)	female	35.0	1	113803	53.1000	C123	S
4	5	0	3	Allen, Mr. William Henry	male	35.0	0	373450	8.0500	NaN	S

from xplainable.core.models import PartitionedClassifier
from xplainable.core.models import XClassifier
import pandas as pd
from sklearn.model_selection import train_test_split

# Load your data

train, test = train_test_split(df, test_size=0.2)

# Train your model (this will open an embedded gui)
partitioned_model = PartitionedClassifier(partition_on='partition_column')

# Iterate over the unique values in the partition column
for partition in train['partition_column'].unique():
      # Get the data for the partition
      part = train[train['partition_column'] == partition]
      x_train, y_train = part.drop('target', axis=1), part['target']

      # Fit the embedded model
      model = XClassifier()
      model.fit(x, y)

      # Add the model to the partitioned model
      partitioned_model.add_partition(model, partition)

# Prepare the test data
x_test, y_test = test.drop('target', axis=1), test['target']

# Predict on the partitioned model
y_pred = partitioned_model.predict(x_test)

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File ~/mambaforge/envs/cfast/lib/python3.11/site-packages/pandas/core/indexes/base.py:3802, in Index.get_loc(self, key, method, tolerance)
   3801 try:
-> 3802     return self._engine.get_loc(casted_key)
   3803 except KeyError as err:

File ~/mambaforge/envs/cfast/lib/python3.11/site-packages/pandas/_libs/index.pyx:138, in pandas._libs.index.IndexEngine.get_loc()

File ~/mambaforge/envs/cfast/lib/python3.11/site-packages/pandas/_libs/index.pyx:165, in pandas._libs.index.IndexEngine.get_loc()

File pandas/_libs/hashtable_class_helper.pxi:5745, in pandas._libs.hashtable.PyObjectHashTable.get_item()

File pandas/_libs/hashtable_class_helper.pxi:5753, in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'partition_column'

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
Cell In[7], line 10
      7 partitioned_model = PartitionedClassifier(partition_on='partition_column')
      9 # Iterate over the unique values in the partition column
---> 10 for partition in train['partition_column'].unique():
     11       # Get the data for the partition
     12       part = train[train['partition_column'] == partition]
     13       x_train, y_train = part.drop('target', axis=1), part['target']

File ~/mambaforge/envs/cfast/lib/python3.11/site-packages/pandas/core/frame.py:3807, in DataFrame.__getitem__(self, key)
   3805 if self.columns.nlevels > 1:
   3806     return self._getitem_multilevel(key)
-> 3807 indexer = self.columns.get_loc(key)
   3808 if is_integer(indexer):
   3809     indexer = [indexer]

File ~/mambaforge/envs/cfast/lib/python3.11/site-packages/pandas/core/indexes/base.py:3804, in Index.get_loc(self, key, method, tolerance)
   3802     return self._engine.get_loc(casted_key)
   3803 except KeyError as err:
-> 3804     raise KeyError(key) from err
   3805 except TypeError:
   3806     # If we have a listlike key, _check_indexing_error will raise
   3807     #  InvalidIndexError. Otherwise we fall through and re-raise
   3808     #  the TypeError.
   3809     self._check_indexing_error(key)

KeyError: 'partition_column'

pp = xp.Preprocessor()
pp.preprocess(df)

df = pd.read_csv('preprocessed_1695571534.csv')
df.head()

# Train a model
model = xp.classifier(df)