c++

Varint编码

原理

Varint 是一种紧凑的表示数字的方法。它用一个或多个字节来表示一个数字，值越小的数字使用越少的字节数。这能减少用来表示数字的字节数。
Varint 中的每个 byte 的最高位 bit 有特殊的含义，如果该位为 1，表示后续的 byte 也是该数字的一部分，如果该位为 0，则结束。其他的 7 个 bit 都用来表示数字。因此小于 128 的数字都可以用一个 byte 表示。大于 128 的数字，会用两个字节

如图所示(借用)

测试实现

varint.hpp


namespace varint
{

    // Maximum buffer size to encode 1 int32 using varint algorithm
    static const int MAX_VARINT_BYTES = 5;


    /** 
     * move the sign from 1st bit to last bit, so negative
     * integer can be encoded efficiently
     *  */
    inline std::uint32_t zig(std::int32_t n)
    {   
        return (n << 1) ^ (n >> 31);
    }   


    /** 
     * decode the zig.  zag( zig (x) ) = x
     * */
    inline std::int32_t zag(std::uint32_t n)
    {   
        std::int32_t result =  (n >> 1) ^ -static_cast<std::int32_t>(n & 1); 
        return result;
    }   
    

    inline int encodeVarint(char* target, std::int32_t source)
    {   
        // encode to zigzag
        unsigned int value = zig(source);
        target[0] = static_cast<char>(value | 0x80);
        if (value >= (1 << 7)) {
            target[1] = static_cast<char>((value >> 7) | 0x80);
            if (value >= (1 << 14)) {
                target[2] = static_cast<char>((value >> 14) | 0x80);
                if (value >= (1 << 21)) {
                    target[3] = static_cast<char>((value >> 21) | 0x80);
                    if (value >= (1 << 28)) {
                        target[4] = static_cast<char>(value >> 28);
                        return 5;
                    } else {
                        target[3] &= 0x7F;
                        return 4;
                    }   
                } else {
                    target[2] &= 0x7F;
                    return 3;
                }   
            } else {
                target[1] &= 0x7F;
                return 2;
            }   
        } else {
            target[0] &= 0x7F;
            return 1;
        }   
    }   


    inline bool decodeVarint(char* stream, std::int32_t & target)
    {   
        char first = *stream;
        if ( (first  & 0x80) == 0)  // fast path for one byte
        {   
            target = zag(  static_cast<std::uint32_t>( first ) );
            return true;
        }   
        std::uint32_t b;
        std::uint32_t result;
        b = first; result  = (b & 0x7F)      ; if (!(b & 0x80)) goto done;
        b = *(stream+1) ; result |= (b & 0x7F) <<  7; if (!(b & 0x80)) goto done;
        b = *(stream+2) ; result |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
        b = *(stream+3) ; result |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
        b = *(stream+4) ; result |=  b         << 28; if (!(b & 0x80)) goto done;
    done:
        target = zag( result );
        return true;
    }

}

main.cpp

#include "varint.hpp"
#include <iostream>
#include <cstdint>

int main( int argc, char * argv[] )
{
    std::int32_t value = 21000;
    std::int32_t result = 0;
    char tempbuf[varint::MAX_VARINT_BYTES]={0};
    std::cout << "size: " << varint::encodeVarint(tempbuf, value) << std::endl;
    std::cout << "buff: "<< tempbuf << std::endl;
    varint::decodeVarint(tempbuf, result);
    std::cout << "result:" << result << std::endl;
}