import xplainable as xplainable
import pandas as pd
from sklearn.model_selection import train_test_split
from IPython.display import HTML
from pathlib import Path
Xplainable
Xplainable
from nbdevAuto.functions import *
import nbdevAuto.functions
= 'titanic'
name = Path(f'Data/{name}')
path = name) kaggle_download(name
file exists
= pd.read_csv(path/'train.csv')
df df.head()
PassengerId | Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
1 | 2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
2 | 3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
3 | 4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
4 | 5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
from xplainable.core.models import PartitionedClassifier
from xplainable.core.models import XClassifier
import pandas as pd
from sklearn.model_selection import train_test_split
# Load your data
= train_test_split(df, test_size=0.2)
train, test
# Train your model (this will open an embedded gui)
= PartitionedClassifier(partition_on='partition_column')
partitioned_model
# Iterate over the unique values in the partition column
for partition in train['partition_column'].unique():
# Get the data for the partition
= train[train['partition_column'] == partition]
part = part.drop('target', axis=1), part['target']
x_train, y_train
# Fit the embedded model
= XClassifier()
model
model.fit(x, y)
# Add the model to the partitioned model
partitioned_model.add_partition(model, partition)
# Prepare the test data
= test.drop('target', axis=1), test['target']
x_test, y_test
# Predict on the partitioned model
= partitioned_model.predict(x_test) y_pred
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) File ~/mambaforge/envs/cfast/lib/python3.11/site-packages/pandas/core/indexes/base.py:3802, in Index.get_loc(self, key, method, tolerance) 3801 try: -> 3802 return self._engine.get_loc(casted_key) 3803 except KeyError as err: File ~/mambaforge/envs/cfast/lib/python3.11/site-packages/pandas/_libs/index.pyx:138, in pandas._libs.index.IndexEngine.get_loc() File ~/mambaforge/envs/cfast/lib/python3.11/site-packages/pandas/_libs/index.pyx:165, in pandas._libs.index.IndexEngine.get_loc() File pandas/_libs/hashtable_class_helper.pxi:5745, in pandas._libs.hashtable.PyObjectHashTable.get_item() File pandas/_libs/hashtable_class_helper.pxi:5753, in pandas._libs.hashtable.PyObjectHashTable.get_item() KeyError: 'partition_column' The above exception was the direct cause of the following exception: KeyError Traceback (most recent call last) Cell In[7], line 10 7 partitioned_model = PartitionedClassifier(partition_on='partition_column') 9 # Iterate over the unique values in the partition column ---> 10 for partition in train['partition_column'].unique(): 11 # Get the data for the partition 12 part = train[train['partition_column'] == partition] 13 x_train, y_train = part.drop('target', axis=1), part['target'] File ~/mambaforge/envs/cfast/lib/python3.11/site-packages/pandas/core/frame.py:3807, in DataFrame.__getitem__(self, key) 3805 if self.columns.nlevels > 1: 3806 return self._getitem_multilevel(key) -> 3807 indexer = self.columns.get_loc(key) 3808 if is_integer(indexer): 3809 indexer = [indexer] File ~/mambaforge/envs/cfast/lib/python3.11/site-packages/pandas/core/indexes/base.py:3804, in Index.get_loc(self, key, method, tolerance) 3802 return self._engine.get_loc(casted_key) 3803 except KeyError as err: -> 3804 raise KeyError(key) from err 3805 except TypeError: 3806 # If we have a listlike key, _check_indexing_error will raise 3807 # InvalidIndexError. Otherwise we fall through and re-raise 3808 # the TypeError. 3809 self._check_indexing_error(key) KeyError: 'partition_column'
= xp.Preprocessor()
pp pp.preprocess(df)
= pd.read_csv('preprocessed_1695571534.csv')
df df.head()
# Train a model
= xp.classifier(df) model