from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.model_selection import train_test_split
[docs]
class Model:
"""Class represents the model used in classification"""
def __init__(self, clf):
"""Method initializes the model, classificator is given in argument."""
self.clf = clf
self.pipeline = None
self.X = None
self.y = None
self.test_size = 0.3
self.random_state = 42
self.X_train = None
self.y_train = None
self.X_test = None
self.y_test = None
self.y_pred = None
[docs]
def set_clf(self, clf):
"""Method sets the classifier."""
self.clf = clf
[docs]
def get_clf(self):
"""Method returns the classifier."""
return self.clf
[docs]
def set_X_train(self, X_train):
"""Method sets the X training data."""
self.X_train = X_train
[docs]
def get_X_train(self):
"""Method returns the X training data."""
return self.X_train
[docs]
def set_y_train(self, y_train):
"""Method sets the y training data."""
self.y_train = y_train
[docs]
def get_y_train(self):
"""Method returns the y training data."""
return self.y_train
[docs]
def set_X_test(self, X_test):
"""Method sets the X test data."""
self.X_test = X_test
[docs]
def get_X_test(self):
"""Method returns the X test data."""
return self.X_test
[docs]
def set_y_test(self, y_test):
"""Method sets the y test data."""
self.y_test = y_test
[docs]
def get_y_test(self):
"""Method returns the y test data."""
return self.y_test
[docs]
def set_X(self, X):
"""Method sets the X data."""
self.X = X
[docs]
def get_X(self):
"""Method returns the X data."""
return self.X
[docs]
def set_y(self, y):
"""Method sets the y data."""
self.y = y
[docs]
def get_y(self):
"""Method returns the y data."""
return self.y
[docs]
def set_test_size(self, test_size):
"""Method sets the test size."""
self.test_size = test_size
[docs]
def get_test_size(self):
"""Method returns the test size."""
return self.test_size
[docs]
def get_pipeline(self):
"""Method returns the pipeline."""
return self.pipeline
[docs]
def set_random_state(self, random_state):
"""Method sets the random state."""
self.random_state = random_state
[docs]
def get_random_state(self):
"""Method returns the random state."""
return self.random_state
[docs]
def set_y_pred(self, y_pred):
"""Method sets the y_pred."""
self.y_pred = y_pred
[docs]
def get_y_pred(self):
"""Method returns the y_pred."""
return self.y_pred
[docs]
def build_pipeline(self):
"""Method builds the pipeline with TfidfVectorizer and chosen classifier."""
self.pipeline = Pipeline(
[("tfidf", TfidfVectorizer()), ("classifier", self.clf)]
)
[docs]
def train(self):
"""Method trains the model."""
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
self.X,
self.y,
test_size=self.test_size,
stratify=self.y,
random_state=self.random_state,
)
self.pipeline.fit(self.X_train, self.y_train)
self.set_y_pred(self.pipeline.predict(self.X_test))
[docs]
def count_accuracy(self):
"""Method returns the accuracy of the model."""
return accuracy_score(
self.get_y_test(), self.get_y_pred()
), classification_report(self.get_y_test(), self.get_y_pred())