Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor fixes to gradient and hessian calculation #1

Merged
merged 12 commits into from
Jan 11, 2024
39 changes: 37 additions & 2 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,14 @@ name: Python application

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

permissions:
contents: read

jobs:
build:
ci:

runs-on: ubuntu-latest

Expand All @@ -39,3 +38,39 @@ jobs:
- name: Test with pytest
run: |
pytest
cd:
needs: ci
# Only run this job if new work is pushed to "main"
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
# Set up operating system
runs-on: ubuntu-latest

# Define job steps
steps:
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install build
# Here we run build to create a wheel and a
# .tar.gz source distribution.
- name: Build package
run: python -m build --sdist --wheel
# Finally, we use a pre-defined action to publish
# our package in place of twine.
- uses: actions/checkout@v3
- name: Publish to TestPyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
password: ${{ secrets.TEST_PYPI_API_TOKEN }}
repository_url: https://test.pypi.org/legacy/
- name: Test install from TestPyPI
run: |
pip install \
--index-url https://test.pypi.org/simple/ \
--extra-index-url https://pypi.org/simple \
pycounts
128 changes: 128 additions & 0 deletions notebooks/comparison_with_classifiers.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Comparing ordinal with usual classification"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In this notebook we use the sklearn diabetes dataset as a comparison between the LGBMOrdinal, LGBMClassifier, and Logistic regression models. We convert the continuous label to classes by binnging it using quantiles.\n",
"\n",
"We then train and test the models several times with different train/test splits and evaluate their mean absolute deviation instead of accuracy. This metric penalises wrong predictions that are further appart from the true label more than those which are closer."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from lightgbm import LGBMClassifier\n",
"from sklearn.datasets import load_diabetes\n",
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"from ordinalgbt.lgb import LGBMOrdinal\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"data = load_diabetes()\n",
"X = pd.DataFrame(data[\"data\"], columns = data[\"feature_names\"])\n",
"y = data[\"target\"]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"nq = 10\n",
"thresholds = np.append(np.append(y.min()-1,np.quantile(y,np.arange(0,1,1/nq)[1:])),y.max()+1)\n",
"yq = pd.cut(x=y,bins=thresholds,right=True,labels=['q'+str(z+1) for z in range(nq)])\n",
"yord = yq.astype('category').codes\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" mdl MAE\n",
"0 LGBMOrdinal 2.0\n",
"1 SKlearn Multinomial 2.5\n",
"2 LGBMClassifier 2.1\n"
]
}
],
"source": [
"holder, coef = [], []\n",
"nsim = 10\n",
"for ii in range(nsim):\n",
" # Do a train/test split (80/20)\n",
" ytrain, ytest, Xtrain, Xtest = train_test_split(yord, X, stratify=yord,test_size=0.2,\n",
" random_state=ii)\n",
" # Ordinal model\n",
" mdl_ord = LGBMOrdinal()\n",
" mdl_ord.fit(Xtrain, ytrain)\n",
" # Multinomial LGBM model\n",
" mdl_class = LGBMClassifier()\n",
" mdl_class.fit(Xtrain, ytrain)\n",
" # Multinomial Regression model\n",
" mdl_multi = LogisticRegression(penalty='l2',solver='lbfgs',max_iter=1000)\n",
" mdl_multi.fit(Xtrain,ytrain)\n",
" # Make predictions\n",
" yhat_ord = mdl_ord.predict(Xtest)\n",
" yhat_multi = mdl_multi.predict(Xtest)\n",
" yhat_class = mdl_class.predict(Xtest)\n",
" # Get MAE\n",
" acc_class = np.abs(yhat_class - ytest).mean()\n",
" acc_multi = np.abs(yhat_multi - ytest).mean()\n",
" acc_ord = np.abs(yhat_ord - ytest).mean()\n",
" holder.append(pd.DataFrame({'ord':acc_ord,'multi':acc_multi,'class':acc_class},index=[ii]))\n",
"\n",
"df_mae = pd.concat(holder).mean(axis=0).reset_index().rename(columns={'index':'mdl',0:'MAE'})\n",
"di_lbls = {'ord':'LGBMOrdinal','multi':'SKlearn Multinomial','class':'LGBMClassifier'}\n",
"df_mae = df_mae.assign(mdl=lambda x: x.mdl.map(di_lbls))\n",
"print(np.round(df_mae,1))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.17"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
8 changes: 3 additions & 5 deletions ordinalgbt/lgb.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,7 @@ def __init__(
# self.threshold_interval = threshold_interval

def _initialise_theta(self):
return np.linspace(0, (self.n_classes - 2) * 2, self.n_classes - 1)

def _initialise_alpha(self):
return theta2alpha(np.linspace(0, (self.n_classes - 2) * 2, self.n_classes - 1))
return np.linspace(0, (self.n_classes - 2) * 1, self.n_classes - 1)

def _lgb_loss_factory(self):
self.theta = self._initialise_theta()
Expand Down Expand Up @@ -96,7 +93,8 @@ def _optimise_alpha(self, y_true, y_preds):
"""
loss = self._alpha_loss_factory(y_true, y_preds)
alpha = theta2alpha(self.theta)
self._alpha_optimisation_report = minimize(loss, alpha)
bounds = [(None,3.58)]*len(alpha)
self._alpha_optimisation_report = minimize(loss, alpha, bounds=bounds)
alpha = self._alpha_optimisation_report.x
self.theta = alpha2theta(alpha)

Expand Down
16 changes: 11 additions & 5 deletions ordinalgbt/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
def dec_clip_y_pred(fun):
@wraps(fun)
def wrapped(*, y_true, y_preds, theta):
y_preds = np.clip(y_preds, -20, a_max=700 + min(theta))
y_preds = np.clip(y_preds, max(theta)-36, a_max=700 + min(theta))
return fun(y_true=y_true, y_preds=y_preds, theta=theta)

return wrapped
Expand Down Expand Up @@ -151,12 +151,12 @@ def probas_from_y_pred(y_preds, theta):
c_probas = stack_zeros_ones(s_array)

probas = c_probas[:, 1 : len(theta) + 2] - c_probas[:, 0 : len(theta) + 1]
probas = np.clip(
probas, a_min=np.finfo(float).eps, a_max=1 - 3 * np.finfo(float).eps
)
# probas = np.clip(
# probas, a_min=np.finfo(float).eps, a_max=1 - len(theta) * np.finfo(float).eps
# )
return probas


@dec_clip_y_pred
def ordinal_logistic_nll(y_true: np.ndarray, y_preds: np.ndarray, theta: np.ndarray):
"""Ordinal Negative log lilelihood

Expand All @@ -180,6 +180,11 @@ def ordinal_logistic_nll(y_true: np.ndarray, y_preds: np.ndarray, theta: np.ndar
probas = probas_from_y_pred(y_preds, theta)
# probabilities associated with the correct label
label_probas = probas[np.arange(0, len(y_true)), y_true]
label_probas = np.clip(
label_probas,
a_min=np.finfo(float).eps,
a_max=1 - len(theta) * np.finfo(float).eps
)
# loss
return -np.sum(np.log(label_probas))

Expand Down Expand Up @@ -255,6 +260,7 @@ def hessian_ordinal_logistic_nll(
hessian = -(h_probas / probas - np.power(g_probas / probas, 2))[
np.arange(0, len(y_true)), y_true
]
# hessian[np.abs(hessian) <=np.finfo(float).eps] = -np.finfo(float).eps
return hessian


Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ install_requires=
[metadata]
name = ordinalgbt
description = A library to build Gradient boosted trees for ordinal labels
version = 0.1
version = 0.1.1
long_description = file:README.md
long_description_content_type = text/markdown
author = Adamos Spanashis
Expand Down
9 changes: 1 addition & 8 deletions tests/test_lgb.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,9 @@
def test_initialise_theta():
model = LGBMOrdinal()
model.n_classes = 5
expected_theta = np.array([0., 2., 4., 6.])
expected_theta = np.array([0., 1., 2., 3.])
assert np.array_equal(model._initialise_theta(), expected_theta)

def test_initialise_alpha():
model = LGBMOrdinal()
model.n_classes = 5
expected_theta = np.array([0., 2., 4., 6.])
expected_alpha = theta2alpha(expected_theta)
assert np.array_equal(model._initialise_alpha(), expected_alpha)

def test_lgb_loss_factory():
model = LGBMOrdinal()
model.n_classes = 5
Expand Down
36 changes: 35 additions & 1 deletion tests/test_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def test_ordinal_logistic_nll():
expected_loss = -np.sum(np.log(
sigmoid(np.array([1,500,-3])) - sigmoid(np.array([-1,-2,-500]))
))
loss = ordinal_logistic_nll(y_true, y_preds, theta)
loss = ordinal_logistic_nll(y_true= y_true, y_preds= y_preds, theta= theta)
assert isinstance(loss, float)
assert loss == pytest.approx(expected_loss)

Expand All @@ -76,6 +76,19 @@ def test_gradient_ordinal_logistic_nll():
np.array([0, 0, 1]),
decimal=3)

def test_gradient_ordinal_logistic_nll_monotonic():
"""
Testing at extreeme values of y_pred where the resolution
of float point arithmetic might fail
"""
y_preds = np.linspace(0,150,100)
y_true = np.array([5]*100)
theta = np.arange(0,18,2)

gradient = gradient_ordinal_logistic_nll(y_true, y_preds, theta)
monotonic = (gradient[1:]- gradient[:-1]) >= 0
assert monotonic.all() , "Not strictly monotonic gradient"

def test_hessian_ordinal_logistic_nll():
y_preds = np.array([1.5, 15, -38])
y_true = np.array([1, 2, 0])
Expand All @@ -86,6 +99,27 @@ def test_hessian_ordinal_logistic_nll():
np.array([0.47, 0, 0]),
decimal=5)

def test_hessian_ordinal_logistic_nll_monotonic():
"""
Testing at extreeme values of y_pred where the resolution
of float point arithmetic might fail
"""
y_preds = np.linspace(0,150,100)
y_true = np.array([5]*100)
theta = np.arange(0,18,2)
expected_max_mask = np.logical_and(y_preds<theta[5],y_preds>theta[4])
hessian = hessian_ordinal_logistic_nll(y_true, y_preds, theta)
np.testing.assert_almost_equal(hessian[expected_max_mask], hessian.max())

expected_max_indx = np.where(expected_max_mask)[0]
ascending = hessian[:expected_max_indx[0]]
assert ((ascending[1:] - ascending[:-1]) >=0).all()

descending = hessian[expected_max_indx[0]:]
assert ((descending[1:] - descending[:-1]) <=0).all()



def test_lgb_ordinal_loss():
y_preds = np.array([1.5, 15, -38])
y_true = np.array([1, 2, 0])
Expand Down