Skip to content

Commit 36f7efb

Browse files
committed
[WIP] Drop the deprecated binary format.
Remove. Basic model test. Cleanup. cli. adaptive test.
1 parent 5ff47c1 commit 36f7efb

File tree

17 files changed

+116
-485
lines changed

17 files changed

+116
-485
lines changed

include/xgboost/c_api.h

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1320,21 +1320,23 @@ XGB_DLL int XGBoosterPredictFromCudaColumnar(BoosterHandle handle, char const *d
13201320
* @{
13211321
*/
13221322

1323-
/*!
1324-
* \brief Load model from existing file
1323+
/**
1324+
* @brief Load the model from an existing file
13251325
*
1326-
* \param handle handle
1327-
* \param fname File URI or file name. The string must be UTF-8 encoded.
1328-
* \return 0 when success, -1 when failure happens
1326+
* @param handle handle
1327+
* @param fname File name. The string must be UTF-8 encoded.
1328+
*
1329+
* @return 0 when success, -1 when failure happens
13291330
*/
13301331
XGB_DLL int XGBoosterLoadModel(BoosterHandle handle,
13311332
const char *fname);
1332-
/*!
1333-
* \brief Save model into existing file
1333+
/**
1334+
* @brief Save the model into an existing file
13341335
*
1335-
* \param handle handle
1336-
* \param fname File URI or file name. The string must be UTF-8 encoded.
1337-
* \return 0 when success, -1 when failure happens
1336+
* @param handle handle
1337+
* @param fname File name. The string must be UTF-8 encoded.
1338+
*
1339+
* @return 0 when success, -1 when failure happens
13381340
*/
13391341
XGB_DLL int XGBoosterSaveModel(BoosterHandle handle,
13401342
const char *fname);

include/xgboost/gbm.h

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,16 +47,7 @@ class GradientBooster : public Model, public Configurable {
4747
* @param cfg configurations on both training and model parameters.
4848
*/
4949
virtual void Configure(Args const& cfg) = 0;
50-
/*!
51-
* \brief load model from stream
52-
* \param fi input stream.
53-
*/
54-
virtual void Load(dmlc::Stream* fi) = 0;
55-
/*!
56-
* \brief save model to stream.
57-
* \param fo output stream
58-
*/
59-
virtual void Save(dmlc::Stream* fo) const = 0;
50+
6051
/**
6152
* \brief Slice a model using boosting index. The slice m:n indicates taking all trees
6253
* that were fit during the boosting rounds m, (m+1), (m+2), ..., (n-1).

include/xgboost/learner.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**
2-
* Copyright 2015-2023 by XGBoost Contributors
2+
* Copyright 2015-2025, XGBoost Contributors
33
* \file learner.h
44
* \brief Learner interface that integrates objective, gbm and evaluation together.
55
* This is the user facing XGBoost training module.
@@ -151,9 +151,6 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
151151
void LoadModel(Json const& in) override = 0;
152152
void SaveModel(Json* out) const override = 0;
153153

154-
virtual void LoadModel(dmlc::Stream* fi) = 0;
155-
virtual void SaveModel(dmlc::Stream* fo) const = 0;
156-
157154
/*!
158155
* \brief Set multiple parameters at once.
159156
*

include/xgboost/model.h

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
1-
/*!
2-
* Copyright (c) 2019 by Contributors
3-
* \file model.h
4-
* \brief Defines the abstract interface for different components in XGBoost.
1+
/**
2+
* Copyright 2019-2025, XGBoost Contributors
3+
*
4+
* @file model.h
5+
* @brief Defines the abstract interface for different components in XGBoost.
56
*/
67
#ifndef XGBOOST_MODEL_H_
78
#define XGBOOST_MODEL_H_
89

9-
namespace dmlc {
10-
class Stream;
11-
} // namespace dmlc
12-
1310
namespace xgboost {
1411

1512
class Json;

include/xgboost/tree_model.h

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -365,17 +365,6 @@ class RegTree : public Model {
365365
return stats_[nid];
366366
}
367367

368-
/*!
369-
* \brief load model from stream
370-
* \param fi input stream
371-
*/
372-
void Load(dmlc::Stream* fi);
373-
/*!
374-
* \brief save model to stream
375-
* \param fo output stream
376-
*/
377-
void Save(dmlc::Stream* fo) const;
378-
379368
void LoadModel(Json const& in) override;
380369
void SaveModel(Json* out) const override;
381370

python-package/xgboost/testing/updater.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -704,12 +704,6 @@ def get_score(config: Dict) -> float:
704704

705705
assert get_score(config_0) == get_score(config_1)
706706

707-
with pytest.warns(Warning, match="Model format is default to UBJSON"):
708-
raw_booster = booster_1.save_raw(raw_format="deprecated")
709-
booster_2 = xgb.Booster(model_file=raw_booster)
710-
config_2 = json.loads(booster_2.save_config())
711-
assert get_score(config_1) == get_score(config_2)
712-
713707
raw_booster = booster_1.save_raw(raw_format="ubj")
714708
booster_2 = xgb.Booster(model_file=raw_booster)
715709
config_2 = json.loads(booster_2.save_config())

src/c_api/c_api.cc

Lines changed: 47 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1400,7 +1400,38 @@ XGB_DLL int XGBoosterPredictFromCUDAColumnar(BoosterHandle handle, char const *,
14001400
}
14011401
#endif // !defined(XGBOOST_USE_CUDA)
14021402

1403-
XGB_DLL int XGBoosterLoadModel(BoosterHandle handle, const char* fname) {
1403+
namespace {
1404+
template <typename Buffer, typename Iter = typename Buffer::const_iterator>
1405+
Json DispatchModelType(Buffer const &buffer, StringView ext, bool warn) {
1406+
auto first_non_space = [&](Iter beg, Iter end) {
1407+
for (auto i = beg; i != end; ++i) {
1408+
if (!std::isspace(*i)) {
1409+
return i;
1410+
}
1411+
}
1412+
return end;
1413+
};
1414+
1415+
Json model;
1416+
auto it = first_non_space(buffer.cbegin() + 1, buffer.cend());
1417+
if (it != buffer.cend() && *it == '"') {
1418+
if (warn) {
1419+
LOG(WARNING) << "Unknown file format: `" << ext << "`. Using JSON as a guess.";
1420+
}
1421+
model = Json::Load(StringView{buffer.data(), buffer.size()});
1422+
} else if (it != buffer.cend() && std::isalpha(*it)) {
1423+
if (warn) {
1424+
LOG(WARNING) << "Unknown file format: `" << ext << "`. Using UBJ as a guess.";
1425+
}
1426+
model = Json::Load(StringView{buffer.data(), buffer.size()}, std::ios::binary);
1427+
} else {
1428+
LOG(FATAL) << "Invalid model format";
1429+
}
1430+
return model;
1431+
}
1432+
} // namespace
1433+
1434+
XGB_DLL int XGBoosterLoadModel(BoosterHandle handle, const char *fname) {
14041435
API_BEGIN();
14051436
CHECK_HANDLE();
14061437
xgboost_CHECK_C_ARG_PTR(fname);
@@ -1410,28 +1441,23 @@ XGB_DLL int XGBoosterLoadModel(BoosterHandle handle, const char* fname) {
14101441
CHECK_EQ(str[0], '{');
14111442
return str;
14121443
};
1413-
if (common::FileExtension(fname) == "json") {
1444+
auto ext = common::FileExtension(fname);
1445+
if (ext == "json") {
14141446
auto buffer = read_file();
14151447
Json in{Json::Load(StringView{buffer.data(), buffer.size()})};
1416-
static_cast<Learner*>(handle)->LoadModel(in);
1417-
} else if (common::FileExtension(fname) == "ubj") {
1448+
static_cast<Learner *>(handle)->LoadModel(in);
1449+
} else if (ext == "ubj") {
14181450
auto buffer = read_file();
14191451
Json in = Json::Load(StringView{buffer.data(), buffer.size()}, std::ios::binary);
14201452
static_cast<Learner *>(handle)->LoadModel(in);
14211453
} else {
1422-
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname, "r"));
1423-
static_cast<Learner*>(handle)->LoadModel(fi.get());
1454+
auto buffer = read_file();
1455+
auto in = DispatchModelType(buffer, ext, true);
1456+
static_cast<Learner *>(handle)->LoadModel(in);
14241457
}
14251458
API_END();
14261459
}
14271460

1428-
namespace {
1429-
void WarnOldModel() {
1430-
LOG(WARNING) << "Saving into deprecated binary model format, please consider using `json` or "
1431-
"`ubj`. Model format is default to UBJSON in XGBoost 2.1 if not specified.";
1432-
}
1433-
} // anonymous namespace
1434-
14351461
XGB_DLL int XGBoosterSaveModel(BoosterHandle handle, const char *fname) {
14361462
API_BEGIN();
14371463
CHECK_HANDLE();
@@ -1447,17 +1473,14 @@ XGB_DLL int XGBoosterSaveModel(BoosterHandle handle, const char *fname) {
14471473
Json::Dump(out, &str, mode);
14481474
fo->Write(str.data(), str.size());
14491475
};
1450-
if (common::FileExtension(fname) == "json") {
1476+
auto ext = common::FileExtension(fname);
1477+
if (ext == "json") {
14511478
save_json(std::ios::out);
1452-
} else if (common::FileExtension(fname) == "ubj") {
1479+
} else if (ext == "ubj") {
14531480
save_json(std::ios::binary);
1454-
} else if (common::FileExtension(fname) == "deprecated") {
1455-
WarnOldModel();
1456-
auto *bst = static_cast<Learner *>(handle);
1457-
bst->SaveModel(fo.get());
14581481
} else {
14591482
LOG(WARNING) << "Saving model in the UBJSON format as default. You can use file extension:"
1460-
" `json`, `ubj` or `deprecated` to choose between formats.";
1483+
" `json` or `ubj` to choose between formats.";
14611484
save_json(std::ios::binary);
14621485
}
14631486
API_END();
@@ -1468,9 +1491,11 @@ XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle, const void *buf,
14681491
API_BEGIN();
14691492
CHECK_HANDLE();
14701493
xgboost_CHECK_C_ARG_PTR(buf);
1471-
1494+
auto buffer = common::Span<char const>{static_cast<char const *>(buf), len};
1495+
// Don't warn, we have to guess the format with buffer input.
1496+
auto in = DispatchModelType(buffer, "", false);
14721497
common::MemoryFixSizeBuffer fs((void *)buf, len); // NOLINT(*)
1473-
static_cast<Learner *>(handle)->LoadModel(&fs);
1498+
static_cast<Learner *>(handle)->LoadModel(in);
14741499
API_END();
14751500
}
14761501

@@ -1503,15 +1528,6 @@ XGB_DLL int XGBoosterSaveModelToBuffer(BoosterHandle handle, char const *json_co
15031528
save_json(std::ios::out);
15041529
} else if (format == "ubj") {
15051530
save_json(std::ios::binary);
1506-
} else if (format == "deprecated") {
1507-
WarnOldModel();
1508-
auto &raw_str = learner->GetThreadLocal().ret_str;
1509-
raw_str.clear();
1510-
common::MemoryBufferStream fo(&raw_str);
1511-
learner->SaveModel(&fo);
1512-
1513-
*out_dptr = dmlc::BeginPtr(raw_str);
1514-
*out_len = static_cast<xgboost::bst_ulong>(raw_str.size());
15151531
} else {
15161532
LOG(FATAL) << "Unknown format: `" << format << "`";
15171533
}

src/cli_main.cc

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -329,29 +329,45 @@ class CLI {
329329
}
330330

331331
void LoadModel(std::string const& path, Learner* learner) const {
332-
if (common::FileExtension(path) == "json") {
333-
auto buffer = common::LoadSequentialFile(path);
334-
CHECK_GT(buffer.size(), 2);
335-
CHECK_EQ(buffer[0], '{');
336-
Json in{Json::Load({buffer.data(), buffer.size()})};
332+
auto ext = common::FileExtension(path);
333+
auto read_file = [&]() {
334+
auto str = common::LoadSequentialFile(path);
335+
CHECK_GE(str.size(), 3); // "{}\0"
336+
CHECK_EQ(str[0], '{');
337+
return str;
338+
};
339+
340+
if (ext == "json") {
341+
auto buffer = read_file();
342+
Json in{Json::Load(StringView{buffer.data(), buffer.size()})};
343+
learner->LoadModel(in);
344+
} else if (ext == "ubj") {
345+
auto buffer = read_file();
346+
Json in = Json::Load(StringView{buffer.data(), buffer.size()}, std::ios::binary);
337347
learner->LoadModel(in);
338348
} else {
339-
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(path.c_str(), "r"));
340-
learner->LoadModel(fi.get());
349+
LOG(FATAL) << "Unknown model format:" << path << ", expecting either json or ubj.";
341350
}
342351
}
343352

344353
void SaveModel(std::string const& path, Learner* learner) const {
345354
learner->Configure();
346355
std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(path.c_str(), "w"));
347-
if (common::FileExtension(path) == "json") {
356+
auto ext = common::FileExtension(path);
357+
auto save_json = [&](std::ios::openmode mode) {
348358
Json out{Object()};
349359
learner->SaveModel(&out);
350-
std::string str;
351-
Json::Dump(out, &str);
352-
fo->Write(str.c_str(), str.size());
360+
std::vector<char> str;
361+
Json::Dump(out, &str, mode);
362+
fo->Write(str.data(), str.size());
363+
};
364+
365+
if (ext == "json") {
366+
save_json(std::ios::out);
367+
} else if (ext == "ubj") {
368+
save_json(std::ios::binary);
353369
} else {
354-
learner->SaveModel(fo.get());
370+
LOG(FATAL) << "Unknown model format:" << path << ", expecting either json or ubj.";
355371
}
356372
}
357373

src/gbm/gblinear.cc

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -92,13 +92,6 @@ class GBLinear : public GradientBooster {
9292

9393
bool ModelFitted() const override { return BoostedRounds() != 0; }
9494

95-
void Load(dmlc::Stream*) override {
96-
LOG(FATAL) << "The deprecated binary model has been removed";
97-
}
98-
void Save(dmlc::Stream*) const override {
99-
LOG(FATAL) << "The deprecated binary model has been removed";
100-
}
101-
10295
void SaveModel(Json* p_out) const override {
10396
auto& out = *p_out;
10497
out["name"] = String{"gblinear"};

src/gbm/gblinear_model.cc

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
/*!
2-
* Copyright 2019-2022 by Contributors
1+
/**
2+
* Copyright 2019-2025, XGBoost Contributors
33
*/
44
#include <algorithm>
55
#include <utility>
6-
#include <limits>
76
#include "xgboost/json.h"
87
#include "gblinear_model.h"
98

0 commit comments

Comments
 (0)