Skip to content

Commit 46cbb4a

Browse files
committed
[WIP] Drop the deprecated binary format.
1 parent 2247a9a commit 46cbb4a

File tree

12 files changed

+87
-417
lines changed

12 files changed

+87
-417
lines changed

include/xgboost/learner.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**
2-
* Copyright 2015-2023 by XGBoost Contributors
2+
* Copyright 2015-2025, XGBoost Contributors
33
* \file learner.h
44
* \brief Learner interface that integrates objective, gbm and evaluation together.
55
* This is the user facing XGBoost training module.
@@ -151,9 +151,6 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
151151
void LoadModel(Json const& in) override = 0;
152152
void SaveModel(Json* out) const override = 0;
153153

154-
virtual void LoadModel(dmlc::Stream* fi) = 0;
155-
virtual void SaveModel(dmlc::Stream* fo) const = 0;
156-
157154
/*!
158155
* \brief Set multiple parameters at once.
159156
*

include/xgboost/model.h

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
1-
/*!
2-
* Copyright (c) 2019 by Contributors
3-
* \file model.h
4-
* \brief Defines the abstract interface for different components in XGBoost.
1+
/**
2+
* Copyright 2019-2025, XGBoost Contributors
3+
*
4+
* @file model.h
5+
* @brief Defines the abstract interface for different components in XGBoost.
56
*/
67
#ifndef XGBOOST_MODEL_H_
78
#define XGBOOST_MODEL_H_
89

9-
namespace dmlc {
10-
class Stream;
11-
} // namespace dmlc
12-
1310
namespace xgboost {
1411

1512
class Json;

include/xgboost/tree_model.h

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -365,17 +365,6 @@ class RegTree : public Model {
365365
return stats_[nid];
366366
}
367367

368-
/*!
369-
* \brief load model from stream
370-
* \param fi input stream
371-
*/
372-
void Load(dmlc::Stream* fi);
373-
/*!
374-
* \brief save model to stream
375-
* \param fo output stream
376-
*/
377-
void Save(dmlc::Stream* fo) const;
378-
379368
void LoadModel(Json const& in) override;
380369
void SaveModel(Json* out) const override;
381370

src/c_api/c_api.cc

Lines changed: 44 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1368,7 +1368,38 @@ XGB_DLL int XGBoosterPredictFromCUDAColumnar(BoosterHandle handle, char const *,
13681368
}
13691369
#endif // !defined(XGBOOST_USE_CUDA)
13701370

1371-
XGB_DLL int XGBoosterLoadModel(BoosterHandle handle, const char* fname) {
1371+
namespace {
1372+
template <typename Buffer, typename Iter = typename Buffer::const_iterator>
1373+
Json DispatchModelType(Buffer const &buffer, StringView ext, bool warn) {
1374+
auto first_non_space = [&](Iter beg, Iter end) {
1375+
for (auto i = beg; i != end; ++i) {
1376+
if (!std::isspace(*i)) {
1377+
return i;
1378+
}
1379+
}
1380+
return end;
1381+
};
1382+
1383+
Json model;
1384+
auto it = first_non_space(buffer.cbegin() + 1, buffer.cend());
1385+
if (it != buffer.cend() && *it == '"') {
1386+
if (warn) {
1387+
LOG(WARNING) << "Unknown file format: `" << ext << "`. Using JSON as a guess.";
1388+
}
1389+
model = Json::Load(StringView{buffer.data(), buffer.size()});
1390+
} else if (it != buffer.cend() && std::isalpha(*it)) {
1391+
if (warn) {
1392+
LOG(WARNING) << "Unknown file format: `" << ext << "`. Using UBJ as a guess.";
1393+
}
1394+
model = Json::Load(StringView{buffer.data(), buffer.size()}, std::ios::binary);
1395+
} else {
1396+
LOG(FATAL) << "Invalid model format";
1397+
}
1398+
return model;
1399+
}
1400+
} // namespace
1401+
1402+
XGB_DLL int XGBoosterLoadModel(BoosterHandle handle, const char *fname) {
13721403
API_BEGIN();
13731404
CHECK_HANDLE();
13741405
xgboost_CHECK_C_ARG_PTR(fname);
@@ -1378,28 +1409,23 @@ XGB_DLL int XGBoosterLoadModel(BoosterHandle handle, const char* fname) {
13781409
CHECK_EQ(str[0], '{');
13791410
return str;
13801411
};
1381-
if (common::FileExtension(fname) == "json") {
1412+
auto ext = common::FileExtension(fname);
1413+
if (ext == "json") {
13821414
auto buffer = read_file();
13831415
Json in{Json::Load(StringView{buffer.data(), buffer.size()})};
1384-
static_cast<Learner*>(handle)->LoadModel(in);
1385-
} else if (common::FileExtension(fname) == "ubj") {
1416+
static_cast<Learner *>(handle)->LoadModel(in);
1417+
} else if (ext == "ubj") {
13861418
auto buffer = read_file();
13871419
Json in = Json::Load(StringView{buffer.data(), buffer.size()}, std::ios::binary);
13881420
static_cast<Learner *>(handle)->LoadModel(in);
13891421
} else {
1390-
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname, "r"));
1391-
static_cast<Learner*>(handle)->LoadModel(fi.get());
1422+
auto buffer = read_file();
1423+
auto in = DispatchModelType(buffer, ext, true);
1424+
static_cast<Learner *>(handle)->LoadModel(in);
13921425
}
13931426
API_END();
13941427
}
13951428

1396-
namespace {
1397-
void WarnOldModel() {
1398-
LOG(WARNING) << "Saving into deprecated binary model format, please consider using `json` or "
1399-
"`ubj`. Model format is default to UBJSON in XGBoost 2.1 if not specified.";
1400-
}
1401-
} // anonymous namespace
1402-
14031429
XGB_DLL int XGBoosterSaveModel(BoosterHandle handle, const char *fname) {
14041430
API_BEGIN();
14051431
CHECK_HANDLE();
@@ -1419,13 +1445,9 @@ XGB_DLL int XGBoosterSaveModel(BoosterHandle handle, const char *fname) {
14191445
save_json(std::ios::out);
14201446
} else if (common::FileExtension(fname) == "ubj") {
14211447
save_json(std::ios::binary);
1422-
} else if (common::FileExtension(fname) == "deprecated") {
1423-
WarnOldModel();
1424-
auto *bst = static_cast<Learner *>(handle);
1425-
bst->SaveModel(fo.get());
14261448
} else {
14271449
LOG(WARNING) << "Saving model in the UBJSON format as default. You can use file extension:"
1428-
" `json`, `ubj` or `deprecated` to choose between formats.";
1450+
" `json` or `ubj` to choose between formats.";
14291451
save_json(std::ios::binary);
14301452
}
14311453
API_END();
@@ -1436,9 +1458,11 @@ XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle, const void *buf,
14361458
API_BEGIN();
14371459
CHECK_HANDLE();
14381460
xgboost_CHECK_C_ARG_PTR(buf);
1439-
1461+
auto buffer = common::Span<char const>{static_cast<char const *>(buf), len};
1462+
// Don't warn, we have to guess the format with buffer input.
1463+
auto in = DispatchModelType(buffer, "", false);
14401464
common::MemoryFixSizeBuffer fs((void *)buf, len); // NOLINT(*)
1441-
static_cast<Learner *>(handle)->LoadModel(&fs);
1465+
static_cast<Learner *>(handle)->LoadModel(in);
14421466
API_END();
14431467
}
14441468

@@ -1471,15 +1495,6 @@ XGB_DLL int XGBoosterSaveModelToBuffer(BoosterHandle handle, char const *json_co
14711495
save_json(std::ios::out);
14721496
} else if (format == "ubj") {
14731497
save_json(std::ios::binary);
1474-
} else if (format == "deprecated") {
1475-
WarnOldModel();
1476-
auto &raw_str = learner->GetThreadLocal().ret_str;
1477-
raw_str.clear();
1478-
common::MemoryBufferStream fo(&raw_str);
1479-
learner->SaveModel(&fo);
1480-
1481-
*out_dptr = dmlc::BeginPtr(raw_str);
1482-
*out_len = static_cast<xgboost::bst_ulong>(raw_str.size());
14831498
} else {
14841499
LOG(FATAL) << "Unknown format: `" << format << "`";
14851500
}

src/gbm/gblinear.cc

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -101,13 +101,6 @@ class GBLinear : public GradientBooster {
101101

102102
bool ModelFitted() const override { return BoostedRounds() != 0; }
103103

104-
void Load(dmlc::Stream* fi) override {
105-
model_.Load(fi);
106-
}
107-
void Save(dmlc::Stream* fo) const override {
108-
model_.Save(fo);
109-
}
110-
111104
void SaveModel(Json* p_out) const override {
112105
auto& out = *p_out;
113106
out["name"] = String{"gblinear"};

src/gbm/gblinear_model.cc

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
/*!
2-
* Copyright 2019-2022 by Contributors
1+
/**
2+
* Copyright 2019-2025, XGBoost Contributors
33
*/
44
#include <algorithm>
55
#include <utility>
6-
#include <limits>
76
#include "xgboost/json.h"
87
#include "gblinear_model.h"
98

src/gbm/gblinear_model.h

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -71,17 +71,6 @@ class GBLinearModel : public Model {
7171
void SaveModel(Json *p_out) const override;
7272
void LoadModel(Json const &in) override;
7373

74-
// save the model to file
75-
void Save(dmlc::Stream *fo) const {
76-
fo->Write(&param_, sizeof(param_));
77-
fo->Write(weight);
78-
}
79-
// load model from file
80-
void Load(dmlc::Stream *fi) {
81-
CHECK_EQ(fi->Read(&param_, sizeof(param_)), sizeof(param_));
82-
fi->Read(&weight);
83-
}
84-
8574
// model bias
8675
inline bst_float *Bias() {
8776
return &weight[learner_model_param->num_feature *

src/gbm/gbtree.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -188,11 +188,6 @@ class GBTree : public GradientBooster {
188188

189189
[[nodiscard]] GBTreeTrainParam const& GetTrainParam() const { return tparam_; }
190190

191-
void Load(dmlc::Stream* fi) override { model_.Load(fi); }
192-
void Save(dmlc::Stream* fo) const override {
193-
model_.Save(fo);
194-
}
195-
196191
void LoadConfig(Json const& in) override;
197192
void SaveConfig(Json* p_out) const override;
198193

src/gbm/gbtree_model.cc

Lines changed: 0 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -50,62 +50,6 @@ void Validate(GBTreeModel const& model) {
5050
}
5151
} // namespace
5252

53-
void GBTreeModel::Save(dmlc::Stream* fo) const {
54-
CHECK_EQ(param.num_trees, static_cast<int32_t>(trees.size()));
55-
56-
if (DMLC_IO_NO_ENDIAN_SWAP) {
57-
fo->Write(&param, sizeof(param));
58-
} else {
59-
auto x = param.ByteSwap();
60-
fo->Write(&x, sizeof(x));
61-
}
62-
for (const auto & tree : trees) {
63-
tree->Save(fo);
64-
}
65-
if (tree_info.size() != 0) {
66-
if (DMLC_IO_NO_ENDIAN_SWAP) {
67-
fo->Write(dmlc::BeginPtr(tree_info), sizeof(int32_t) * tree_info.size());
68-
} else {
69-
for (const auto& e : tree_info) {
70-
auto x = e;
71-
dmlc::ByteSwap(&x, sizeof(x), 1);
72-
fo->Write(&x, sizeof(x));
73-
}
74-
}
75-
}
76-
}
77-
78-
void GBTreeModel::Load(dmlc::Stream* fi) {
79-
CHECK_EQ(fi->Read(&param, sizeof(param)), sizeof(param))
80-
<< "GBTree: invalid model file";
81-
if (!DMLC_IO_NO_ENDIAN_SWAP) {
82-
param = param.ByteSwap();
83-
}
84-
trees.clear();
85-
trees_to_update.clear();
86-
for (int32_t i = 0; i < param.num_trees; ++i) {
87-
std::unique_ptr<RegTree> ptr(new RegTree());
88-
ptr->Load(fi);
89-
trees.push_back(std::move(ptr));
90-
}
91-
tree_info.resize(param.num_trees);
92-
if (param.num_trees != 0) {
93-
if (DMLC_IO_NO_ENDIAN_SWAP) {
94-
CHECK_EQ(
95-
fi->Read(dmlc::BeginPtr(tree_info), sizeof(int32_t) * param.num_trees),
96-
sizeof(int32_t) * param.num_trees);
97-
} else {
98-
for (auto& info : tree_info) {
99-
CHECK_EQ(fi->Read(&info, sizeof(int32_t)), sizeof(int32_t));
100-
dmlc::ByteSwap(&info, sizeof(info), 1);
101-
}
102-
}
103-
}
104-
105-
MakeIndptr(this);
106-
Validate(*this);
107-
}
108-
10953
void GBTreeModel::SaveModel(Json* p_out) const {
11054
auto& out = *p_out;
11155
CHECK_EQ(param.num_trees, static_cast<int>(trees.size()));

src/gbm/gbtree_model.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,6 @@ struct GBTreeModel : public Model {
106106
}
107107
}
108108

109-
void Load(dmlc::Stream* fi);
110-
void Save(dmlc::Stream* fo) const;
111-
112109
void SaveModel(Json* p_out) const override;
113110
void LoadModel(Json const& p_out) override;
114111

0 commit comments

Comments
 (0)