7
\$\begingroup\$

Motivation: Partially for fun / learning, but also so I can roll out a custom JSON-like file format for a fighting game I am writing. There are two caveats: you cannot have repeated keys (requires std::unordered_multimap) and the keys are not in order (requires insertion order std::vector on the side, or some external boost lib probably). I am mainly looking for critique on my level of comments and ability to read my code, but everything else I am of course welcome to hear.

#pragma once #include <list> #include <map> #include <string> #include <variant> #include <vector> #include <fstream> #include <iostream> #include <sstream> namespace json { class Value; // six json data types using null_t = std::nullptr_t; using bool_t = bool; using number_t = std::double_t; using string_t = std::string; using array_t = std::vector<Value>; using object_t = std::map<string_t, Value>; using aggregate_t = std::variant< null_t, bool_t, number_t, string_t, array_t, object_t>; class Value : protected aggregate_t { public: using aggregate_t::variant; // removes spurious E0291 Value() = default; // converts int into double rather than bool Value(int integer) : aggregate_t(static_cast<double>(integer)) {} // converts c_string (pointer) into string rather than bool Value(const char* c_string) : aggregate_t(std::string(c_string)) {} public: auto operator[](const string_t& key) -> Value& { // transform into object if null if (std::get_if<null_t>(this)) *this = object_t(); return std::get<object_t>(*this)[key]; } auto operator[](std::size_t key) -> Value& { // transform into array if null if (std::get_if<null_t>(this)) *this = array_t(); if (key >= std::get<array_t>(*this).size()) std::get<array_t>(*this).resize(key + 1); return std::get<array_t>(*this)[key]; } auto save(std::ostream& stream, std::string prefix = "") -> std::ostream& { static const std::string SPACING = " "; // "\t"; // " "; // depending on the type, write to correct value with format to stream std::visit([&stream, &prefix](auto&& value) { using namespace std; using T = decay_t<decltype(value)>; if constexpr (is_same_v<T, nullptr_t>) stream << "null"; if constexpr (is_same_v<T, bool_t>) stream << (value ? "true" : "false"); else if constexpr (is_same_v<T, double_t>) stream << value; else if constexpr (is_same_v<T, string>) stream << '"' << value << '"'; else if constexpr (is_same_v<T, array_t>) { stream << "[\n"; auto [indent, remaining] = make_tuple(prefix + SPACING, value.size()); // for every json value, indent and print to stream for (auto& json : value) json.save(stream << indent, indent) // if jsons remaining (not last), append comma << (--remaining ? ",\n" : "\n"); stream << prefix << "]"; } else if constexpr (is_same_v<T, object_t>) { stream << "{\n"; auto [indent, remaining] = make_tuple(prefix + SPACING, value.size()); // for every json value, indent with key and print to stream for (auto& [key, json] : value) json.save(stream << indent << '"' << key << "\" : ", indent) // if jsons remaining (not last), append comma << (--remaining ? ",\n" : "\n"); stream << prefix << "}"; } }, *static_cast<aggregate_t*>(this)); return stream; } auto load(std::istream& stream) -> std::istream& { using namespace std; switch ((stream >> ws).peek()) { case '"': { // get word surrounded by " stringbuf buffer; stream.ignore(1) .get(buffer, '"') .ignore(1); *this = buffer.str(); } break; case '[': { array_t array; for (stream.ignore(1); (stream >> ws).peek() != ']';) // load child json and consume comma if available if ((array.emplace_back().load(stream) >> ws).peek() == ',') stream.ignore(1); stream.ignore(1); *this = move(array); } break; case '{': { object_t object; for (stream.ignore(1); (stream >> ws).peek() != '}';) { // get word surrounded by " stringbuf buffer; stream.ignore(numeric_limits<streamsize>::max(), '"') .get(buffer, '"') .ignore(numeric_limits<streamsize>::max(), ':'); // load child json and consume comma if available if ((object[buffer.str()].load(stream) >> ws).peek() == ',') stream.ignore(1); } stream.ignore(1); *this = move(object); } break; default: { if (isdigit(stream.peek()) || stream.peek() == '.') { double_t number; stream >> number; *this = number; } else if (isalpha(stream.peek())) { // get alphabetic word string word; for (; isalpha(stream.peek()); stream.ignore()) word.push_back(stream.peek()); // set value to look-up table's value static auto keyword_lut = map<string_view, Value>{ {"true", true}, {"false", false}, {"null", nullptr}}; *this = keyword_lut[word]; } else *this = nullptr; } break; } return stream; } auto save_to_path(std::string_view file_path) -> void { auto file = std::ofstream(std::string(file_path)); save(file); } auto load_from_path(std::string_view file_path) -> void { auto file = std::ifstream(std::string(file_path)); load(file); } static void test() { std::stringstream ss; { json::Value value; auto& employee = value["employee"]; employee["name"] = "bob"; employee["age"] = 21; employee["friends"][0] = "alice"; employee["friends"][1] = "billy"; employee["weight"] = 140.0; value.save(ss); } std::cout << ss.str() << "\n\n"; { auto example = std::stringstream(R"( { "firstName": "John", "lastName": "Smith", "isAlive": true, "age": 27, "address": { "streetAddress": "21 2nd Street", "city": "New York", "state": "NY", "postalCode": "10021-3100" }, "phoneNumbers": [ { "type": "home", "number": "212 555-1234" }, { "type": "office", "number": "646 555-4567" }, { "type": "mobile", "number": "123 456-7890" } ], "children": [], "spouse": null })"); json::Value value; value.load(example); ss.clear(); value.save(ss); } std::cout << ss.str() << "\n\n"; } }; } int main() { json::Value::test(); return getchar(); } 
\$\endgroup\$

    1 Answer 1

    3
    \$\begingroup\$

    Let's go through the code and see what can be improved.


    #pragma once 

    This shouldn't be in a non-header.


    #include <list> #include <map> #include <string> #include <variant> #include <vector> #include <fstream> #include <iostream> #include <sstream> 

    Sort the include directives in alphabetical order.


    namespace json { class Value; // six json data types using null_t = std::nullptr_t; using bool_t = bool; using number_t = std::double_t; using string_t = std::string; using array_t = std::vector<Value>; using object_t = std::map<string_t, Value>; using aggregate_t = std::variant< null_t, bool_t, number_t, string_t, array_t, object_t>; 

    json is a very common name used by many people, leading to name clashes. Think of a more unique name. Maybe unicorn5838::json?

    I don't see a reason to use std::nullptr_t for null_t. std::nullptr_t is a null pointer literal and can implicitly convert to pointers. Is that plausible? You can use std::monostate instead, or just struct null_t { };.

    You are mixing std::double_t and double. My advice is to just use double. Also, consistently use number_t after the alias declaration.


    class Value : protected aggregate_t { public: using aggregate_t::variant; // removes spurious E0291 Value() = default; // converts int into double rather than bool Value(int integer) : aggregate_t(static_cast<double>(integer)) {} // converts c_string (pointer) into string rather than bool Value(const char* c_string) : aggregate_t(std::string(c_string)) {} 

    Hmm ... Protected inheritance? Why do you need it in this case? (You can address this question with a comment if you have a good reason.)

    Inheriting the constructors of std::variant doesn't seem to be a good choice here. I see your effort in fixing the problems, but just providing your own constructors seems easier.


    public: auto operator[](const string_t& key) -> Value& { // transform into object if null if (std::get_if<null_t>(this)) *this = object_t(); return std::get<object_t>(*this)[key]; } auto operator[](std::size_t key) -> Value& { // transform into array if null if (std::get_if<null_t>(this)) *this = array_t(); if (key >= std::get<array_t>(*this).size()) std::get<array_t>(*this).resize(key + 1); return std::get<array_t>(*this)[key]; } 

    Don't use multiple public: labels.

    Do not use the trailing return type syntax unless necessary. (Yeah, I know some people advocate always using a trailing return type, but it arguably makes the code more verbose.)

    Your operator[] automatically constructs the element if not existent, much like map::operator[] but not vector::operator[]. I'm not sure whether this behavior is intuitive enough to justify itself, but anyway ...

    *this = object_t(); should be emplace<object_t>(); to prevent an unnecessary move construction.

    You do std::get<array_t>(*this) three times, and the complex code for getting the value will be run three times. Instead, use a reference:

    auto& arr = std::get<array_t>(*this); if (key >= arr.size()) arr.resize(key + 1); return arr[key]; 

    Also note that key + 1 may overflow (well, probably not a real problem).

    auto save(std::ostream& stream, std::string prefix = "") -> std::ostream& { static const std::string SPACING = " "; // "\t"; // " "; // depending on the type, write to correct value with format to stream std::visit([&stream, &prefix](auto&& value) { using namespace std; using T = decay_t<decltype(value)>; if constexpr (is_same_v<T, nullptr_t>) stream << "null"; if constexpr (is_same_v<T, bool_t>) stream << (value ? "true" : "false"); else if constexpr (is_same_v<T, double_t>) stream << value; else if constexpr (is_same_v<T, string>) stream << '"' << value << '"'; else if constexpr (is_same_v<T, array_t>) { stream << "[\n"; auto [indent, remaining] = make_tuple(prefix + SPACING, value.size()); // for every json value, indent and print to stream for (auto& json : value) json.save(stream << indent, indent) // if jsons remaining (not last), append comma << (--remaining ? ",\n" : "\n"); stream << prefix << "]"; } else if constexpr (is_same_v<T, object_t>) { stream << "{\n"; auto [indent, remaining] = make_tuple(prefix + SPACING, value.size()); // for every json value, indent with key and print to stream for (auto& [key, json] : value) json.save(stream << indent << '"' << key << "\" : ", indent) // if jsons remaining (not last), append comma << (--remaining ? ",\n" : "\n"); stream << prefix << "}"; } }, *static_cast<aggregate_t*>(this)); return stream; } 

    Use null_t and string_t, not nullptr_t and string. Use const auto& instead of auto&& if you don't need the universal reference semantics. prefix should be std::string_view instead of by-value std::string. The spacing should also be an argument instead of hard coded.

    This is a very long function. The long if constexpr chain makes the code much less readable. Use overload resolution to break it down:

    // somewhere struct formatter { std::ostream& os; std::string_view prefix; std::string_view indent; void operator()(null_t) const; void operator()(bool_t) const; // etc. }; 

    then you can just do

    std::visit(formatter{os, prefix, indent}, static_cast<aggregate_t&>(*this)); 

    The string streaming should use std::quoted to properly handle escaping.

    Don't do this:

    auto [indent, remaining] = make_tuple(prefix + SPACING, value.size()); 

    It incurs a lot of overhead, both on performance and readability.


    auto load(std::istream& stream) -> std::istream& { using namespace std; switch ((stream >> ws).peek()) { case '"': { // get word surrounded by " stringbuf buffer; stream.ignore(1) .get(buffer, '"') .ignore(1); *this = buffer.str(); } break; case '[': { array_t array; for (stream.ignore(1); (stream >> ws).peek() != ']';) // load child json and consume comma if available if ((array.emplace_back().load(stream) >> ws).peek() == ',') stream.ignore(1); stream.ignore(1); *this = move(array); } break; case '{': { object_t object; for (stream.ignore(1); (stream >> ws).peek() != '}';) { // get word surrounded by " stringbuf buffer; stream.ignore(numeric_limits<streamsize>::max(), '"') .get(buffer, '"') .ignore(numeric_limits<streamsize>::max(), ':'); // load child json and consume comma if available if ((object[buffer.str()].load(stream) >> ws).peek() == ',') stream.ignore(1); } stream.ignore(1); *this = move(object); } break; default: { if (isdigit(stream.peek()) || stream.peek() == '.') { double_t number; stream >> number; *this = number; } else if (isalpha(stream.peek())) { // get alphabetic word string word; for (; isalpha(stream.peek()); stream.ignore()) word.push_back(stream.peek()); // set value to look-up table's value static auto keyword_lut = map<string_view, Value>{ {"true", true}, {"false", false}, {"null", nullptr}}; *this = keyword_lut[word]; } else *this = nullptr; } break; } return stream; } 

    Don't use stringbuf. It's a low level functionality. Use std::quoted instead:

    case '"': { std::string str; stream >> std::quoted(str); emplace<string_t>(str); break; } 

    .ignore(1) is slower than .get() without aggressive optimization.

    The table should be const, and .at (which looks up existing elements) should be used instead of [] (which creates new elements and cannot be used on const maps). Using a map for three strings is also an overkill and will introduce overhead.


    auto save_to_path(std::string_view file_path) -> void { auto file = std::ofstream(std::string(file_path)); save(file); } auto load_from_path(std::string_view file_path) -> void { auto file = std::ifstream(std::string(file_path)); load(file); } 

    Please don't "always use auto". I know it is suggested by Herb Sutter (right?), but it is really unidiomatic and distracting.

    \$\endgroup\$
    9
    • \$\begingroup\$I've gone ahead and made all the corrections except the follow (please refute me if I am wrong): #pragma once it is a header file, protected aggregate_t because a json IS-A variant, not HAS-A variant, multiple public: labels separates my ctors / API and trailing returns is my coding style (I would change depending on my org).\$\endgroup\$
      – Saxpy
      CommentedOct 8, 2019 at 19:18
    • \$\begingroup\$ah two more notes: I can't use std::string_view for prefix because I require append operations to tabify my json -> file operation, second I use a std::map lookup table even if it is overkill because its easy to read and I will be adding my own custom keywords for my fighting game.\$\endgroup\$
      – Saxpy
      CommentedOct 8, 2019 at 19:28
    • 1
      \$\begingroup\$@Saxpy "Is-a" should be indicated by private inheritance, not protected inheritance. Don't abuse multiple public: labels, which alter semantics, for separating declaration blocks; using a comment like // constructors instead is much clearer. Always trailing return is a, well, unpopular, and illogical style (coding styles exist to improve readability). string_view provides the operations you want. Keeping the map is understandable though, but at least make it const.\$\endgroup\$
      – L. F.
      CommentedOct 9, 2019 at 9:47
    • \$\begingroup\$I see, I'll go ahead and change it to private inheritance. Also I'll take note on the public: blocks into // constructors. I'd still like to keep trailing returns at this is my personal project and that's the style I've committed too and prefer on my own. string_view does not offer append operations though, how could I refactor this? The std::map will be const.\$\endgroup\$
      – Saxpy
      CommentedOct 9, 2019 at 16:55
    • 1
      \$\begingroup\$@Saxpy Oops, I didn’t realize that the string view ctor is explicit. Sorry, I’ll reconsider the recommendation\$\endgroup\$
      – L. F.
      CommentedOct 12, 2019 at 9:03

    Start asking to get answers

    Find the answer to your question by asking.

    Ask question

    Explore related questions

    See similar questions with these tags.