forked from TheAlgorithms/Python
- Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathxgboost_regressor.py
66 lines (56 loc) · 2.23 KB
/
xgboost_regressor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# XGBoost Regressor Example
importnumpyasnp
fromsklearn.datasetsimportfetch_california_housing
fromsklearn.metricsimportmean_absolute_error, mean_squared_error
fromsklearn.model_selectionimporttrain_test_split
fromxgboostimportXGBRegressor
defdata_handling(data: dict) ->tuple:
# Split dataset into features and target. Data is features.
"""
>>> data_handling((
... {'data':'[ 8.3252 41. 6.9841269 1.02380952 322. 2.55555556 37.88 -122.23 ]'
... ,'target':([4.526])}))
('[ 8.3252 41. 6.9841269 1.02380952 322. 2.55555556 37.88 -122.23 ]', [4.526])
"""
return (data["data"], data["target"])
defxgboost(
features: np.ndarray, target: np.ndarray, test_features: np.ndarray
) ->np.ndarray:
"""
>>> xgboost(np.array([[ 2.3571 , 52. , 6.00813008, 1.06775068,
... 907. , 2.45799458, 40.58 , -124.26]]),np.array([1.114]),
... np.array([[1.97840000e+00, 3.70000000e+01, 4.98858447e+00, 1.03881279e+00,
... 1.14300000e+03, 2.60958904e+00, 3.67800000e+01, -1.19780000e+02]]))
array([[1.1139996]], dtype=float32)
"""
xgb=XGBRegressor(
verbosity=0, random_state=42, tree_method="exact", base_score=0.5
)
xgb.fit(features, target)
# Predict target for test data
predictions=xgb.predict(test_features)
predictions=predictions.reshape(len(predictions), 1)
returnpredictions
defmain() ->None:
"""
The URL for this algorithm
https://xgboost.readthedocs.io/en/stable/
California house price dataset is used to demonstrate the algorithm.
Expected error values:
Mean Absolute Error: 0.30957163379906033
Mean Square Error: 0.22611560196662744
"""
# Load California house price dataset
california=fetch_california_housing()
data, target=data_handling(california)
x_train, x_test, y_train, y_test=train_test_split(
data, target, test_size=0.25, random_state=1
)
predictions=xgboost(x_train, y_train, x_test)
# Error printing
print(f"Mean Absolute Error: {mean_absolute_error(y_test, predictions)}")
print(f"Mean Square Error: {mean_squared_error(y_test, predictions)}")
if__name__=="__main__":
importdoctest
doctest.testmod(verbose=True)
main()