From d65618c33bdc029ceda54bf4ee3b7ffd8d9490ce Mon Sep 17 00:00:00 2001 From: Alexey Date: Fri, 30 Jul 2021 16:10:09 +0300 Subject: [PATCH] multihash-and-fixes - Updated (#158) * multihash holds sptr to const data for performance * multihash updates for performance * iterator-related bugs in peer routing table fixed * fix pr issues Signed-off-by: Alexey-N-Chernyshov Co-authored-by: Artem Co-authored-by: Igor Egorov --- include/libp2p/multi/multihash.hpp | 32 +++- .../kademlia/impl/peer_routing_table_impl.hpp | 68 +++----- src/multi/CMakeLists.txt | 2 +- src/multi/multihash.cpp | 100 +++++++---- .../kademlia/impl/peer_routing_table_impl.cpp | 159 +++++++++++++----- 5 files changed, 246 insertions(+), 115 deletions(-) diff --git a/include/libp2p/multi/multihash.hpp b/include/libp2p/multi/multihash.hpp index cbc08f92..d1261493 100644 --- a/include/libp2p/multi/multihash.hpp +++ b/include/libp2p/multi/multihash.hpp @@ -24,6 +24,12 @@ namespace libp2p::multi { */ class Multihash { public: + Multihash(const Multihash &other) = default; + Multihash &operator=(const Multihash &other) = default; + Multihash(Multihash &&other) noexcept = default; + Multihash &operator=(Multihash &&other) noexcept = default; + ~Multihash() = default; + using Buffer = common::ByteArray; static constexpr uint8_t kMaxHashLength = 127; @@ -86,6 +92,11 @@ namespace libp2p::multi { */ const Buffer &toBuffer() const; + /** + * @return Pre-calculated hash for std containers + */ + size_t stdHash() const; + bool operator==(const Multihash &other) const; bool operator!=(const Multihash &other) const; bool operator<(const Multihash &other) const; @@ -110,9 +121,20 @@ namespace libp2p::multi { * Contains a one byte hash type, a one byte hash length, and the stored * hash itself */ - std::vector data_; - uint8_t hash_offset_{}; ///< size of non-hash data from the beginning - HashType type_; + struct Data { + // TODO(artem): move to small_vector + // as soon as toBuffer() -> span is acceptable + std::vector bytes; + uint8_t hash_offset{}; ///< size of non-hash data from the beginning + HashType type; + size_t std_hash; ///< Hash for unordered containers + + Data(HashType t, gsl::span h); + }; + + const Data& data() const; + + std::shared_ptr data_; }; } // namespace libp2p::multi @@ -120,7 +142,9 @@ namespace libp2p::multi { namespace std { template <> struct hash { - size_t operator()(const libp2p::multi::Multihash &x) const; + size_t operator()(const libp2p::multi::Multihash &x) const { + return x.stdHash(); + } }; } // namespace std diff --git a/include/libp2p/protocol/kademlia/impl/peer_routing_table_impl.hpp b/include/libp2p/protocol/kademlia/impl/peer_routing_table_impl.hpp index a0dc4bd8..141f4cef 100644 --- a/include/libp2p/protocol/kademlia/impl/peer_routing_table_impl.hpp +++ b/include/libp2p/protocol/kademlia/impl/peer_routing_table_impl.hpp @@ -9,7 +9,8 @@ #include #include -#include +#include +#include #include #include @@ -50,58 +51,41 @@ namespace libp2p::protocol::kademlia { return std::memcmp(d1.data(), d2.data(), size) < 0; } - Hash256 hfrom; + Hash256 hfrom{}; }; + /** * Single bucket which holds peers. */ - class Bucket : public std::deque { + class Bucket { public: - void truncate(size_t limit) { - if (size() > limit) { - erase(std::next(begin(), limit), end()); - } - } + size_t size() const; - std::vector peerIds() const { - std::vector peerIds; - peerIds.reserve(size()); - std::transform(begin(), end(), std::back_inserter(peerIds), - [](const auto &bpi) { return bpi.peer_id; }); - return peerIds; - } + void append(const Bucket &bucket); - bool contains(const peer::PeerId &p) { - auto it = std::find_if(begin(), end(), - [=](const auto &bpi) { return bpi.peer_id == p; }); - return it != end(); - } + // sort bucket in ascending order by XOR distance from node_id + void sort(const NodeId &node_id); - bool remove(const peer::PeerId &p) { - // this shifts elements to the end - auto it = std::remove_if( - begin(), end(), [&](const auto &bpi) { return bpi.peer_id == p; }); - if (it != end()) { - erase(it); - return true; - } + auto find(const peer::PeerId &p) const; - return false; - } + bool moveToFront(const PeerId &pid); - Bucket split(size_t commonLenPrefix, const NodeId &target) { - Bucket b{}; - // remove shifts all elements "to be removed" to the end, other elements - // preserve their relative order - auto new_end = std::remove_if(begin(), end(), [&](const auto &bpi) { - return bpi.node_id.commonPrefixLen(target) > commonLenPrefix; - }); + void emplaceToFront(const PeerId &pid, bool is_replaceable); - b.assign(std::make_move_iterator(new_end), - std::make_move_iterator(end())); + boost::optional removeReplaceableItem(); - return b; - } + void truncate(size_t limit); + + std::vector peerIds() const; + + bool contains(const peer::PeerId &p) const; + + bool remove(const peer::PeerId &p); + + Bucket split(size_t commonLenPrefix, const NodeId &target); + + private: + std::list peers_; }; class PeerRoutingTableImpl @@ -122,7 +106,7 @@ namespace libp2p::protocol::kademlia { std::shared_ptr bus); outcome::result update(const peer::PeerId &pid, bool is_permanent, - bool is_connected=false) override; + bool is_connected = false) override; void remove(const peer::PeerId &peer_id) override; diff --git a/src/multi/CMakeLists.txt b/src/multi/CMakeLists.txt index 788d801e..67e59dc5 100644 --- a/src/multi/CMakeLists.txt +++ b/src/multi/CMakeLists.txt @@ -21,7 +21,7 @@ libp2p_add_library(p2p_multihash ) target_link_libraries(p2p_multihash p2p_hexutil - p2p_uvarint + p2p_varint_prefix_reader Boost::boost ) diff --git a/src/multi/multihash.cpp b/src/multi/multihash.cpp index ae92d58f..d5fea61b 100644 --- a/src/multi/multihash.cpp +++ b/src/multi/multihash.cpp @@ -5,12 +5,11 @@ #include -#include #include -#include +#include #include #include -#include +#include using libp2p::common::ByteArray; using libp2p::common::hex_upper; @@ -37,15 +36,47 @@ OUTCOME_CPP_DEFINE_CATEGORY(libp2p::multi, Multihash::Error, e) { namespace libp2p::multi { - Multihash::Multihash(HashType type, gsl::span hash) { - type_ = type; - UVarint uvarint{type}; - auto &&bytes = uvarint.toBytes(); - data_.insert(data_.end(), bytes.begin(), bytes.end()); - BOOST_ASSERT(hash.size() <= std::numeric_limits::max()); - data_.push_back(static_cast(hash.size())); - hash_offset_ = data_.size(); - data_.insert(data_.end(), hash.begin(), hash.end()); + Multihash::Multihash(HashType type, gsl::span hash) + : data_(std::make_shared(type, hash)) {} + + namespace { + template + inline void appendVarint(Buffer &buffer, uint64_t t) { + do { + uint8_t byte = t & 0x7F; + t >>= 7; + if (t != 0) { + byte |= 0x80; + } + buffer.push_back(byte); + } while (t > 0); + } + } // namespace + + Multihash::Data::Data(HashType t, gsl::span h) : type(t) { + bytes.reserve(h.size() + 4); + appendVarint(bytes, type); + BOOST_ASSERT(h.size() <= std::numeric_limits::max()); + bytes.push_back(static_cast(h.size())); + hash_offset = bytes.size(); + bytes.insert(bytes.end(), h.begin(), h.end()); + std_hash = boost::hash_range(bytes.begin(), bytes.end()); + } + + const Multihash::Data &Multihash::data() const { +#if NDEBUG + if (data_ == nullptr) { + log::createLogger("Multihash")->critical("attempt to use moved object"); + throw std::runtime_error("attempt to use moved multihash"); + } +#else + BOOST_ASSERT(data_); +#endif + return *data_; + } + + size_t Multihash::stdHash() const { + return data().std_hash; } outcome::result Multihash::create(HashType type, @@ -68,11 +99,18 @@ namespace libp2p::multi { return Error::INPUT_TOO_SHORT; } - UVarint varint(b); + basic::VarintPrefixReader vr; + if (vr.consume(b) != basic::VarintPrefixReader::kReady) { + return Error::INPUT_TOO_SHORT; + } - const auto type = static_cast(varint.toUInt64()); - uint8_t length = b[varint.size()]; - gsl::span hash = b.subspan(varint.size() + 1); + const auto type = static_cast(vr.value()); + if (b.empty()) { + return Error::INPUT_TOO_SHORT; + } + + const uint8_t length = b[0]; + gsl::span hash = b.subspan(1); if (length == 0) { return Error::ZERO_INPUT_LENGTH; @@ -86,23 +124,29 @@ namespace libp2p::multi { } const HashType &Multihash::getType() const { - return type_; + return data().type; } gsl::span Multihash::getHash() const { - return gsl::span(data_).subspan(hash_offset_); + const auto &d = data(); + return gsl::span(d.bytes).subspan(d.hash_offset); } std::string Multihash::toHex() const { - return hex_upper(data_); + return hex_upper(data().bytes); } const common::ByteArray &Multihash::toBuffer() const { - return data_; + return data().bytes; } bool Multihash::operator==(const Multihash &other) const { - return this->data_ == other.data_ && this->type_ == other.type_; + const auto &a = data(); + const auto &b = other.data(); + if (data_ == other.data_) { + return true; + } + return a.bytes == b.bytes && a.type == b.type; } bool Multihash::operator!=(const Multihash &other) const { @@ -110,14 +154,12 @@ namespace libp2p::multi { } bool Multihash::operator<(const class libp2p::multi::Multihash &other) const { - return this->type_ < other.type_ - || (this->type_ == other.type_ && this->data_ < other.data_); + const auto &a = data(); + const auto &b = other.data(); + if (a.type == b.type) { + return a.bytes < b.bytes; + } + return a.type < b.type; } } // namespace libp2p::multi - -size_t std::hash::operator()( - const libp2p::multi::Multihash &x) const { - const auto &container = x.toBuffer(); - return boost::hash_range(container.begin(), container.end()); -} diff --git a/src/protocol/kademlia/impl/peer_routing_table_impl.cpp b/src/protocol/kademlia/impl/peer_routing_table_impl.cpp index 70530938..2f59c34f 100644 --- a/src/protocol/kademlia/impl/peer_routing_table_impl.cpp +++ b/src/protocol/kademlia/impl/peer_routing_table_impl.cpp @@ -36,6 +36,103 @@ namespace { namespace libp2p::protocol::kademlia { + size_t Bucket::size() const { + return peers_.size(); + } + + void Bucket::append(const Bucket &bucket) { + peers_.insert(peers_.end(), bucket.peers_.begin(), bucket.peers_.end()); + } + + void Bucket::sort(const NodeId &node_id) { + XorDistanceComparator cmp(node_id); + peers_.sort(cmp); + } + + auto Bucket::find(const peer::PeerId &p) const { + return std::find_if(peers_.begin(), peers_.end(), + [&p](const auto &i) { return i.peer_id == p; }); + } + + bool Bucket::moveToFront(const PeerId &pid) { + auto it = find(pid); + if (it != peers_.end()) { + if (it != peers_.begin()) { + peers_.splice(peers_.begin(), peers_, it); + } + return false; + } + return true; + } + + void Bucket::emplaceToFront(const PeerId &pid, bool is_replaceable) { + peers_.emplace(peers_.begin(), pid, is_replaceable); + } + + boost::optional Bucket::removeReplaceableItem() { + boost::optional result; + + for (auto it = peers_.rbegin(); it != peers_.rend(); ++it) { + if (it->is_replaceable) { + result = std::move(it->peer_id); + peers_.erase((++it).base()); + break; + } + } + + return result; + } + + void Bucket::truncate(size_t limit) { + if (limit == 0) { + peers_.clear(); + } else if (peers_.size() > limit) { + peers_.erase(std::next(peers_.begin(), static_cast(limit)), + peers_.end()); + } + } + + std::vector Bucket::peerIds() const { + std::vector peerIds; + peerIds.reserve(peers_.size()); + std::transform(peers_.begin(), peers_.end(), std::back_inserter(peerIds), + [](const auto &bpi) { return bpi.peer_id; }); + return peerIds; + } + + bool Bucket::contains(const peer::PeerId &p) const { + return find(p) != peers_.end(); + } + + bool Bucket::remove(const peer::PeerId &p) { + auto it = find(p); + if (it != peers_.end()) { + peers_.erase(it); + return true; + } + + return false; + } + + Bucket Bucket::split(size_t commonLenPrefix, const NodeId &target) { + Bucket b{}; + + std::list new_peers; + + while (!peers_.empty()) { + auto it = peers_.begin(); + if (it->node_id.commonPrefixLen(target) > commonLenPrefix) { + b.peers_.splice(b.peers_.end(), peers_, it); + } else { + new_peers.splice(new_peers.end(), peers_, it); + } + } + + peers_.swap(new_peers); + + return b; + } + PeerRoutingTableImpl::PeerRoutingTableImpl( const Config &config, std::shared_ptr identity_manager, @@ -74,23 +171,36 @@ namespace libp2p::protocol::kademlia { // if this happens, search both surrounding buckets for nearby peers if (bucketId > 0) { auto &left = buckets_.at(bucketId - 1); - bucket.insert(bucket.end(), left.begin(), left.end()); + bucket.append(left); } if (bucketId < buckets_.size() - 1) { auto &right = buckets_.at(bucketId + 1); - bucket.insert(bucket.end(), right.begin(), right.end()); + bucket.append(right); } } // sort bucket in ascending order by XOR distance from local peer. - XorDistanceComparator cmp(node_id); - std::sort(bucket.begin(), bucket.end(), cmp); + bucket.sort(node_id); bucket.truncate(count); return bucket.peerIds(); } + namespace { + outcome::result replacePeer(Bucket &bucket, const peer::PeerId &pid, + bool is_replaceable, event::Bus &bus) { + const auto removed = bucket.removeReplaceableItem(); + if (!removed.has_value()) { + return PeerRoutingTableImpl::Error::PEER_REJECTED_NO_CAPACITY; + } + bus.getChannel().publish(removed.value()); + bucket.emplaceToFront(pid, is_replaceable); + bus.getChannel().publish(pid); + return true; + } + } // namespace + outcome::result PeerRoutingTableImpl::update(const peer::PeerId &pid, bool is_permanent, bool is_connected) { @@ -102,12 +212,7 @@ namespace libp2p::protocol::kademlia { // Trying to find and move to front if its a long lived connected peer if (is_connected) { - auto it = - std::find_if(bucket.begin(), bucket.end(), - [&pid](const auto &bpi) { return bpi.peer_id == pid; }); - if (it != bucket.end()) { - bucket.push_front(*it); - bucket.erase(it); + if (!bucket.moveToFront(pid)) { return false; } } else if (bucket.contains(pid)) { @@ -115,7 +220,7 @@ namespace libp2p::protocol::kademlia { } if (bucket.size() < config_.maxBucketSize) { - bucket.emplace_front(pid, !is_permanent); + bucket.emplaceToFront(pid, !is_permanent); bus_->getChannel().publish(pid); return true; } @@ -131,39 +236,15 @@ namespace libp2p::protocol::kademlia { auto resizedBucketId = getBucketId(buckets_, cpl); auto &resizedBucket = buckets_.at(resizedBucketId); if (resizedBucket.size() < config_.maxBucketSize) { - resizedBucket.emplace_front(pid, is_permanent); + resizedBucket.emplaceToFront(pid, !is_permanent); bus_->getChannel().publish(pid); return true; } - auto replaceablePeerIt = - std::find_if(resizedBucket.rbegin(), resizedBucket.rend(), - [](const auto &bpi) { return bpi.is_replaceable; }); - if (replaceablePeerIt == resizedBucket.rend()) { - return Error::PEER_REJECTED_NO_CAPACITY; - } - auto removedPeer = (*replaceablePeerIt).peer_id; - bus_->getChannel().publish(removedPeer); - std::advance(replaceablePeerIt, 1); - resizedBucket.erase(replaceablePeerIt.base()); - resizedBucket.emplace_front(pid, !is_permanent); - bus_->getChannel().publish(pid); - return true; + return replacePeer(resizedBucket, pid, !is_permanent, *bus_); } - auto replaceablePeerIt = - std::find_if(bucket.rbegin(), bucket.rend(), - [](const auto &bpi) { return bpi.is_replaceable; }); - if (replaceablePeerIt == bucket.rend()) { - return Error::PEER_REJECTED_NO_CAPACITY; - } - auto removedPeer = (*replaceablePeerIt).peer_id; - bus_->getChannel().publish(removedPeer); - std::advance(replaceablePeerIt, 1); - bucket.erase(replaceablePeerIt.base()); - bucket.emplace_front(pid, !is_permanent); - bus_->getChannel().publish(pid); - return true; + return replacePeer(bucket, pid, !is_permanent, *bus_); } void PeerRoutingTableImpl::nextBucket() { @@ -193,7 +274,7 @@ namespace libp2p::protocol::kademlia { std::vector PeerRoutingTableImpl::getAllPeers() const { std::vector vec; - for (auto &bucket : buckets_) { + for (const auto &bucket : buckets_) { auto peer_ids = bucket.peerIds(); vec.insert(vec.end(), peer_ids.begin(), peer_ids.end()); }