Xplainable

Xplainable
Author

Benedict Thekkel

import xplainable as xplainable
import pandas as pd
from sklearn.model_selection import train_test_split
from IPython.display import HTML
from pathlib import Path
from nbdevAuto.functions import * 
import nbdevAuto.functions
name = 'titanic'
path = Path(f'Data/{name}')
kaggle_download(name = name)
file exists
df = pd.read_csv(path/'train.csv')
df.head()
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
from xplainable.core.models import PartitionedClassifier
from xplainable.core.models import XClassifier
import pandas as pd
from sklearn.model_selection import train_test_split
# Load your data

train, test = train_test_split(df, test_size=0.2)

# Train your model (this will open an embedded gui)
partitioned_model = PartitionedClassifier(partition_on='partition_column')

# Iterate over the unique values in the partition column
for partition in train['partition_column'].unique():
      # Get the data for the partition
      part = train[train['partition_column'] == partition]
      x_train, y_train = part.drop('target', axis=1), part['target']

      # Fit the embedded model
      model = XClassifier()
      model.fit(x, y)

      # Add the model to the partitioned model
      partitioned_model.add_partition(model, partition)

# Prepare the test data
x_test, y_test = test.drop('target', axis=1), test['target']

# Predict on the partitioned model
y_pred = partitioned_model.predict(x_test)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File ~/mambaforge/envs/cfast/lib/python3.11/site-packages/pandas/core/indexes/base.py:3802, in Index.get_loc(self, key, method, tolerance)
   3801 try:
-> 3802     return self._engine.get_loc(casted_key)
   3803 except KeyError as err:

File ~/mambaforge/envs/cfast/lib/python3.11/site-packages/pandas/_libs/index.pyx:138, in pandas._libs.index.IndexEngine.get_loc()

File ~/mambaforge/envs/cfast/lib/python3.11/site-packages/pandas/_libs/index.pyx:165, in pandas._libs.index.IndexEngine.get_loc()

File pandas/_libs/hashtable_class_helper.pxi:5745, in pandas._libs.hashtable.PyObjectHashTable.get_item()

File pandas/_libs/hashtable_class_helper.pxi:5753, in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'partition_column'

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
Cell In[7], line 10
      7 partitioned_model = PartitionedClassifier(partition_on='partition_column')
      9 # Iterate over the unique values in the partition column
---> 10 for partition in train['partition_column'].unique():
     11       # Get the data for the partition
     12       part = train[train['partition_column'] == partition]
     13       x_train, y_train = part.drop('target', axis=1), part['target']

File ~/mambaforge/envs/cfast/lib/python3.11/site-packages/pandas/core/frame.py:3807, in DataFrame.__getitem__(self, key)
   3805 if self.columns.nlevels > 1:
   3806     return self._getitem_multilevel(key)
-> 3807 indexer = self.columns.get_loc(key)
   3808 if is_integer(indexer):
   3809     indexer = [indexer]

File ~/mambaforge/envs/cfast/lib/python3.11/site-packages/pandas/core/indexes/base.py:3804, in Index.get_loc(self, key, method, tolerance)
   3802     return self._engine.get_loc(casted_key)
   3803 except KeyError as err:
-> 3804     raise KeyError(key) from err
   3805 except TypeError:
   3806     # If we have a listlike key, _check_indexing_error will raise
   3807     #  InvalidIndexError. Otherwise we fall through and re-raise
   3808     #  the TypeError.
   3809     self._check_indexing_error(key)

KeyError: 'partition_column'
pp = xp.Preprocessor()
pp.preprocess(df)
df = pd.read_csv('preprocessed_1695571534.csv')
df.head()
# Train a model
model = xp.classifier(df)
Back to top