Browse Source

Content Identifier (#10)

* Introduce CID
* Introduce multicodec type table
pull/12/head
Harrm 5 years ago
committed by GitHub
parent
commit
7d06491f61
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 50
      include/libp2p/multi/content_identifier.hpp
  2. 45
      include/libp2p/multi/content_identifier_codec.hpp
  3. 4
      include/libp2p/multi/hash_type.hpp
  4. 63
      include/libp2p/multi/multicodec_type.hpp
  5. 2
      include/libp2p/multi/multihash.hpp
  6. 3
      include/libp2p/multi/uvarint.hpp
  7. 10
      src/multi/CMakeLists.txt
  8. 40
      src/multi/content_identifier.cpp
  9. 103
      src/multi/content_identifier_codec.cpp
  10. 4
      src/multi/multihash.cpp
  11. 9
      src/multi/uvarint.cpp
  12. 4
      src/peer/peer_id.cpp
  13. 9
      test/libp2p/multi/CMakeLists.txt
  14. 131
      test/libp2p/multi/cid_test.cpp
  15. 4
      test/libp2p/multi/multihash_test.cpp

50
include/libp2p/multi/content_identifier.hpp

@ -0,0 +1,50 @@
/**
* Copyright Soramitsu Co., Ltd. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef LIBP2P_CONTENT_IDENTIFIER_HPP
#define LIBP2P_CONTENT_IDENTIFIER_HPP
#include <vector>
#include <boost/operators.hpp>
#include <libp2p/multi/multicodec_type.hpp>
#include <libp2p/multi/multihash.hpp>
namespace libp2p::multi {
/**
* A CID is a self-describing content-addressed identifier. It uses
* cryptographic hashes to achieve content addressing. It uses several
* multiformats to achieve flexible self-description, namely multihash for
* hashes, multicodec for data content types, and multibase to encode the CID
* itself into strings. Concretely, it's a typed content address: a tuple of
* (content-type, content-address).
*
* @note multibase may be omitted in non text-based protocols and is generally
* needed only for CIDs serialized to a string, so it is not present in this
* structure
*/
struct ContentIdentifier: public boost::equality_comparable<ContentIdentifier> {
enum class Version { V0 = 0, V1 = 1 };
ContentIdentifier(Version version, MulticodecType::Code content_type,
Multihash content_address);
/**
* @param base is a human-readable multibase prefix
* @returns human readable representation of the CID
*/
std::string toPrettyString(const std::string &base);
bool operator==(const ContentIdentifier &c) const;
Version version;
MulticodecType::Code content_type;
Multihash content_address;
};
} // namespace libp2p::multi
#endif // LIBP2P_CONTENT_IDENTIFIER_HPP

45
include/libp2p/multi/content_identifier_codec.hpp

@ -0,0 +1,45 @@
/**
* Copyright Soramitsu Co., Ltd. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef LIBP2P_CONTENT_IDENTIFIER_CODEC_HPP
#define LIBP2P_CONTENT_IDENTIFIER_CODEC_HPP
#include <libp2p/multi/content_identifier.hpp>
namespace libp2p::multi {
/**
* Serializes and deserializes CID to byte representation.
* To serialize it to a multibase encoded string, use MultibaseCodec
* @see MultibaseCodec
*/
class ContentIdentifierCodec {
public:
enum class EncodeError {
INVALID_CONTENT_TYPE = 1,
INVALID_HASH_TYPE,
INVALID_HASH_LENGTH
};
enum class DecodeError {
EMPTY_VERSION = 1,
EMPTY_MULTICODEC,
MALFORMED_VERSION,
RESERVED_VERSION
};
static outcome::result<std::vector<uint8_t>> encode(
const ContentIdentifier &cid);
static outcome::result<ContentIdentifier> decode(
gsl::span<uint8_t> bytes);
};
} // namespace libp2p::multi
OUTCOME_HPP_DECLARE_ERROR(libp2p::multi, ContentIdentifierCodec::EncodeError);
OUTCOME_HPP_DECLARE_ERROR(libp2p::multi, ContentIdentifierCodec::DecodeError);
#endif // LIBP2P_CONTENT_IDENTIFIER_CODEC_HPP

4
include/libp2p/multi/hash_type.hpp

@ -7,6 +7,10 @@
#define LIBP2P_HASH_TYPE_HPP
namespace libp2p::multi {
/// TODO(Harrm) FIL-14: Hash types are a part of multicodec table, it would be good to
/// move them there to avoid duplication and allow for extraction of
/// human-friendly name of a type from its code
/// @see MulticodecType
/// https://github.com/multiformats/js-multihash/blob/master/src/constants.js
enum HashType : uint64_t {
identity = 0x0,

63
include/libp2p/multi/multicodec_type.hpp

@ -0,0 +1,63 @@
/**
* Copyright Soramitsu Co., Ltd. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef LIBP2P_MULTICODECTYPE_HPP
#define LIBP2P_MULTICODECTYPE_HPP
#include <string>
#include <boost/optional.hpp>
namespace libp2p::multi {
/**
* LibP2P uses "protocol tables" to agree upon the mapping from one multicodec
* code. These tables can be application specific, though, like with other
* multiformats, there is a globally agreed upon table with common protocols
* and formats.
*/
class MulticodecType {
public:
/// TODO(Harrm) add more codes
enum Code {
IDENTITY = 0x00,
SHA1 = 0x11,
SHA2_256 = 0x12,
SHA2_512 = 0x13,
SHA3_512 = 0x14,
SHA3_384 = 0x15,
SHA3_256 = 0x16,
SHA3_224 = 0x17,
DAG_PB = 0x70
};
static std::string getName(Code code) {
switch (code) {
case Code::IDENTITY:
return "raw";
case Code::SHA1:
return "sha1";
case Code::SHA2_256:
return "sha2-256";
case Code::SHA2_512:
return "sha2-512";
case Code::SHA3_224:
return "sha3-224";
case Code::SHA3_256:
return "sha3-256";
case Code::SHA3_384:
return "sha3-384";
case Code::SHA3_512:
return "sha3-512";
case Code::DAG_PB:
return "dag-pb";
}
return "unknown";
}
};
} // namespace libp2p::multi
#endif // LIBP2P_MULTICODECTYPE_HPP

2
include/libp2p/multi/multihash.hpp

@ -63,7 +63,7 @@ namespace libp2p::multi {
* @param b - the buffer with the multihash
* @return result with the multihash in case of success
*/
static outcome::result<Multihash> createFromBuffer(
static outcome::result<Multihash> createFromBytes(
gsl::span<const uint8_t> b);
/**

3
include/libp2p/multi/uvarint.hpp

@ -69,6 +69,9 @@ namespace libp2p::multi {
*/
UVarint &operator=(uint64_t n);
bool operator==(const UVarint& r) const;
bool operator!=(const UVarint& r) const;
/**
* @return the number of bytes currently stored in a varint
*/

10
src/multi/CMakeLists.txt

@ -32,3 +32,13 @@ target_link_libraries(p2p_multiaddress
p2p_converters
Boost::boost
)
libp2p_add_library(p2p_cid
content_identifier.cpp
content_identifier_codec.cpp
)
target_link_libraries(p2p_cid
p2p_uvarint
p2p_multihash
)

40
src/multi/content_identifier.cpp

@ -0,0 +1,40 @@
/**
* Copyright Soramitsu Co., Ltd. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0
*/
#include <libp2p/multi/content_identifier.hpp>
#include <boost/format.hpp>
#include <libp2p/common/hexutil.hpp>
namespace libp2p::multi {
ContentIdentifier::ContentIdentifier(Version version,
MulticodecType::Code content_type,
Multihash content_address)
: version{version},
content_type{content_type},
content_address{std::move(content_address)} {}
std::string ContentIdentifier::toPrettyString(const std::string &base) {
/// TODO(Harrm) FIL-14: hash type is a subset of multicodec type, better move them
/// to one place
std::string hash_type = MulticodecType::getName(
static_cast<MulticodecType::Code>(content_address.getType()));
std::string hash_hex = common::hex_lower(content_address.getHash());
std::string hash_length =
std::to_string(content_address.getHash().size() * 8);
std::string v = "cidv" + std::to_string(static_cast<uint64_t>(version));
return (boost::format("%1% - %2% - %3% - %4%-%5%-%6%") % base % v
% MulticodecType::getName(content_type) % hash_type % hash_length
% hash_hex)
.str();
}
bool ContentIdentifier::operator==(const ContentIdentifier &c) const {
return version == c.version and content_type == c.content_type
and content_address == c.content_address;
}
} // namespace libp2p::multi

103
src/multi/content_identifier_codec.cpp

@ -0,0 +1,103 @@
/**
* Copyright Soramitsu Co., Ltd. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0
*/
#include <libp2p/multi/content_identifier_codec.hpp>
#include <libp2p/multi/multicodec_type.hpp>
#include <libp2p/multi/uvarint.hpp>
OUTCOME_CPP_DEFINE_CATEGORY(libp2p::multi, ContentIdentifierCodec::EncodeError,
e) {
using E = libp2p::multi::ContentIdentifierCodec::EncodeError;
switch (e) {
case E::INVALID_CONTENT_TYPE:
return "Content type does not conform the version";
case E::INVALID_HASH_LENGTH:
return "Hash length is invalid; Must be 32 bytes for sha256 in version 0";
case E::INVALID_HASH_TYPE:
return "Hash type is invalid; Must be sha256 in version 0";
}
return "Unknown error";
}
OUTCOME_CPP_DEFINE_CATEGORY(libp2p::multi, ContentIdentifierCodec::DecodeError,
e) {
using E = libp2p::multi::ContentIdentifierCodec::DecodeError;
switch (e) {
case E::EMPTY_MULTICODEC:
return "Multicodec prefix is absent";
case E::EMPTY_VERSION:
return "Version is absent";
case E::MALFORMED_VERSION:
return "Version is malformed; Must be a non-negative integer";
case E::RESERVED_VERSION:
return "Version is greater than the latest version";
}
return "Unknown error";
}
namespace libp2p::multi {
outcome::result<std::vector<uint8_t>> ContentIdentifierCodec::encode(
const ContentIdentifier &cid) {
std::vector<uint8_t> bytes;
if (cid.version == ContentIdentifier::Version::V1) {
UVarint version(static_cast<uint64_t>(cid.version));
common::append(bytes, version.toBytes());
UVarint type(cid.content_type);
common::append(bytes, type.toBytes());
auto const &hash = cid.content_address.toBuffer();
common::append(bytes, hash);
} else if (cid.version == ContentIdentifier::Version::V0) {
if (cid.content_type != MulticodecType::DAG_PB) {
return EncodeError::INVALID_CONTENT_TYPE;
}
if (cid.content_address.getType() != HashType::sha256) {
return EncodeError::INVALID_HASH_TYPE;
}
if (cid.content_address.getHash().size() != 32) {
return EncodeError::INVALID_HASH_LENGTH;
}
auto const &hash = cid.content_address.toBuffer();
common::append(bytes, hash);
}
return bytes;
}
outcome::result<ContentIdentifier> ContentIdentifierCodec::decode(
gsl::span<uint8_t> bytes) {
if (bytes.size() == 34 and bytes[0] == 0x12 and bytes[1] == 0x20) {
OUTCOME_TRY(hash, Multihash::createFromBytes(bytes));
return ContentIdentifier(ContentIdentifier::Version::V0,
MulticodecType::DAG_PB, std::move(hash));
} else {
auto version_opt = UVarint::create(bytes);
if (!version_opt) {
return DecodeError::EMPTY_VERSION;
}
auto version = version_opt.value().toUInt64();
if (version == 1) {
auto version_length = UVarint::calculateSize(bytes);
auto multicodec_opt = UVarint::create(bytes.subspan(version_length));
if (!multicodec_opt) {
return DecodeError::EMPTY_MULTICODEC;
}
auto multicodec_length =
UVarint::calculateSize(bytes.subspan(version_length));
OUTCOME_TRY(hash,
Multihash::createFromBytes(
bytes.subspan(version_length + multicodec_length)));
return ContentIdentifier(
ContentIdentifier::Version::V1,
MulticodecType::Code(multicodec_opt.value().toUInt64()),
std::move(hash));
} else if (version <= 0) {
return DecodeError::MALFORMED_VERSION;
} else {
return DecodeError::RESERVED_VERSION;
}
}
}
} // namespace libp2p::multi

4
src/multi/multihash.cpp

@ -62,10 +62,10 @@ namespace libp2p::multi {
outcome::result<Multihash> Multihash::createFromHex(std::string_view hex) {
OUTCOME_TRY(buf, unhex(hex));
return Multihash::createFromBuffer(buf);
return Multihash::createFromBytes(buf);
}
outcome::result<Multihash> Multihash::createFromBuffer(
outcome::result<Multihash> Multihash::createFromBytes(
gsl::span<const uint8_t> b) {
if (b.size() < kHeaderSize) {
return Error::INPUT_TOO_SHORT;

9
src/multi/uvarint.cpp

@ -81,6 +81,15 @@ namespace libp2p::multi {
return *this;
}
bool UVarint::operator==(const UVarint &r) const {
return std::equal(bytes_.begin(), bytes_.end(), r.bytes_.begin(),
r.bytes_.end());
}
bool UVarint::operator!=(const UVarint &r) const {
return !(*this == r);
}
size_t UVarint::calculateSize(gsl::span<const uint8_t> varint_bytes) {
size_t s = 0;

4
src/peer/peer_id.cpp

@ -46,7 +46,7 @@ namespace libp2p::peer {
PeerId::FactoryResult PeerId::fromBase58(std::string_view id) {
OUTCOME_TRY(decoded_id, decodeBase58(id));
OUTCOME_TRY(hash, Multihash::createFromBuffer(decoded_id));
OUTCOME_TRY(hash, Multihash::createFromBytes(decoded_id));
if (hash.getType() != multi::HashType::sha256
&& hash.toBuffer().size() > kMaxInlineKeyLength) {
@ -90,7 +90,7 @@ namespace libp2p::peer {
}
PeerId::FactoryResult PeerId::fromBytes(gsl::span<const uint8_t> v) {
OUTCOME_TRY(mh, Multihash::createFromBuffer(v));
OUTCOME_TRY(mh, Multihash::createFromBytes(v));
return fromHash(mh);
}
} // namespace libp2p::peer

9
test/libp2p/multi/CMakeLists.txt

@ -29,3 +29,12 @@ target_link_libraries(multibase_codec_test
p2p_multibase_codec
p2p_literals
)
addtest(cid_test
cid_test.cpp
)
target_link_libraries(cid_test
p2p_cid
p2p_multibase_codec
p2p_literals
)

131
test/libp2p/multi/cid_test.cpp

@ -0,0 +1,131 @@
/**
* Copyright Soramitsu Co., Ltd. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0
*/
#include <gtest/gtest.h>
#include <libp2p/common/hexutil.hpp>
#include <libp2p/common/literals.hpp>
#include <libp2p/multi/content_identifier.hpp>
#include <libp2p/multi/content_identifier_codec.hpp>
#include <libp2p/multi/multibase_codec/multibase_codec_impl.hpp>
#include <libp2p/multi/multicodec_type.hpp>
#include <libp2p/multi/uvarint.hpp>
#include <testutil/outcome.hpp>
using libp2p::multi::ContentIdentifier;
using libp2p::multi::ContentIdentifierCodec;
using libp2p::multi::HashType;
using libp2p::multi::MultibaseCodec;
using libp2p::multi::MultibaseCodecImpl;
using libp2p::multi::MulticodecType;
using libp2p::multi::Multihash;
using libp2p::multi::UVarint;
using libp2p::common::operator""_multihash;
using libp2p::common::operator""_unhex;
const Multihash ZERO_MULTIHASH =
"12200000000000000000000000000000000000000000000000000000000000000000"_multihash;
const Multihash EXAMPLE_MULTIHASH =
"12206e6ff7950a36187a801613426e858dce686cd7d7e3c0fc42ee0330072d245c95"_multihash;
TEST(CidTest, PrettyString) {
ContentIdentifier c1(ContentIdentifier::Version::V1, MulticodecType::IDENTITY,
ZERO_MULTIHASH);
ASSERT_EQ(c1.toPrettyString("base58"),
"base58 - cidv1 - raw - sha2-256-256-"
+ libp2p::common::hex_lower(ZERO_MULTIHASH.getHash()));
ContentIdentifier c2(ContentIdentifier::Version::V0, MulticodecType::DAG_PB,
EXAMPLE_MULTIHASH);
ASSERT_EQ(c2.toPrettyString("base64"),
"base64 - cidv0 - dag-pb - sha2-256-256-"
+ libp2p::common::hex_lower(EXAMPLE_MULTIHASH.getHash()));
}
class CidEncodeTest
: public testing::TestWithParam<
std::pair<ContentIdentifier, outcome::result<std::vector<uint8_t>>>> {
};
TEST(CidTest, Create) {
ContentIdentifier c(ContentIdentifier::Version::V0, MulticodecType::IDENTITY,
EXAMPLE_MULTIHASH);
ASSERT_EQ(c.content_address, EXAMPLE_MULTIHASH);
}
TEST_P(CidEncodeTest, Encode) {
auto [cid, expectation] = GetParam();
auto bytes = ContentIdentifierCodec::encode(cid);
if (expectation) {
auto bytes_value = bytes.value();
auto expectation_value = expectation.value();
ASSERT_TRUE(std::equal(bytes_value.begin(), bytes_value.end(),
expectation_value.begin()))
<< libp2p::common::hex_lower(bytes_value);
} else {
ASSERT_EQ(bytes.error(), expectation.error()) << bytes.error().message();
}
}
class CidDecodeTest
: public testing::TestWithParam<
std::pair<std::vector<uint8_t>, outcome::result<ContentIdentifier>>> {
public:
void SetUp() {
base_codec = std::make_shared<MultibaseCodecImpl>();
}
std::shared_ptr<MultibaseCodec> base_codec;
};
TEST_P(CidDecodeTest, Decode) {
auto [cid_bytes, expectation] = GetParam();
auto cid = ContentIdentifierCodec::decode(cid_bytes);
if (expectation) {
ASSERT_EQ(cid.value(), expectation.value());
} else {
ASSERT_EQ(cid.error(), expectation.error()) << cid.error().message();
}
}
class CidEncodeDecodeTest : public testing::TestWithParam<ContentIdentifier> {};
TEST_P(CidEncodeDecodeTest, DecodedMatchesOriginal) {
auto cid = GetParam();
EXPECT_OUTCOME_TRUE(bytes, ContentIdentifierCodec::encode(cid));
EXPECT_OUTCOME_TRUE(dec_cid, ContentIdentifierCodec::decode(bytes));
ASSERT_EQ(cid, dec_cid);
}
const std::vector<
std::pair<ContentIdentifier, outcome::result<std::vector<uint8_t>>>>
encodeSuite{{ContentIdentifier(ContentIdentifier::Version::V0,
MulticodecType::SHA1, ZERO_MULTIHASH),
ContentIdentifierCodec::EncodeError::INVALID_CONTENT_TYPE},
{ContentIdentifier(ContentIdentifier::Version::V0,
MulticodecType::DAG_PB, ZERO_MULTIHASH),
ZERO_MULTIHASH.toBuffer()}};
INSTANTIATE_TEST_CASE_P(EncodeTests, CidEncodeTest,
testing::ValuesIn(encodeSuite));
const std::vector<
std::pair<std::vector<uint8_t>, outcome::result<ContentIdentifier>>>
decodeSuite{{EXAMPLE_MULTIHASH.toBuffer(),
ContentIdentifier(ContentIdentifier::Version::V0,
MulticodecType::DAG_PB, EXAMPLE_MULTIHASH)}};
INSTANTIATE_TEST_CASE_P(DecodeTests, CidDecodeTest,
testing::ValuesIn(decodeSuite));
const std::vector<ContentIdentifier> encodeDecodeSuite = {
ContentIdentifier(ContentIdentifier::Version::V0, MulticodecType::DAG_PB,
EXAMPLE_MULTIHASH),
ContentIdentifier(ContentIdentifier::Version::V1, MulticodecType::IDENTITY,
ZERO_MULTIHASH),
ContentIdentifier(ContentIdentifier::Version::V1, MulticodecType::SHA1,
EXAMPLE_MULTIHASH)};
INSTANTIATE_TEST_CASE_P(EncodeDecodeTest, CidEncodeDecodeTest,
testing::ValuesIn(encodeDecodeSuite));

4
test/libp2p/multi/multihash_test.cpp

@ -76,11 +76,11 @@ TEST(Multihash, FromToBuffer) {
auto hash = "8203020304"_unhex;
ASSERT_NO_THROW({
auto m = Multihash::createFromBuffer(hash).value();
auto m = Multihash::createFromBytes(hash).value();
ASSERT_EQ(m.toBuffer(), hash);
});
ByteArray v{2, 3, 1, 3};
ASSERT_FALSE(Multihash::createFromBuffer(v))
ASSERT_FALSE(Multihash::createFromBytes(v))
<< "Length in the header does not equal actual length";
}

Loading…
Cancel
Save