LCOV - code coverage report
Current view: top level - src - Decoder.cpp (source / functions) Hit Total Coverage
Test: cpp-bencoding code coverage Lines: 108 111 97.3 %
Date: 2018-04-21 15:28:44 Functions: 21 21 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /**
       2             : * @file      Decoder.cpp
       3             : * @copyright (c) 2014 by Petr Zemek (s3rvac@gmail.com) and contributors
       4             : * @license   BSD, see the @c LICENSE file for more details
       5             : * @brief     Implementation of the Decoder class.
       6             : */
       7             : 
       8             : #include "Decoder.h"
       9             : 
      10             : #include <cassert>
      11             : #include <regex>
      12             : #include <sstream>
      13             : 
      14             : #include "BDictionary.h"
      15             : #include "BInteger.h"
      16             : #include "BList.h"
      17             : #include "BString.h"
      18             : #include "Utils.h"
      19             : 
      20             : namespace bencoding {
      21             : 
      22             : /**
      23             : * @brief Constructs a new exception with the given message.
      24             : */
      25          25 : DecodingError::DecodingError(const std::string &what):
      26          25 :     std::runtime_error(what) {}
      27             : 
      28             : /**
      29             : * @brief Constructs a decoder.
      30             : */
      31          38 : Decoder::Decoder() {}
      32             : 
      33             : /**
      34             : * @brief Creates a new decoder.
      35             : */
      36          38 : std::unique_ptr<Decoder> Decoder::create() {
      37          38 :     return std::unique_ptr<Decoder>(new Decoder());
      38             : }
      39             : 
      40             : /**
      41             : * @brief Decodes the given bencoded @a data and returns them.
      42             : *
      43             : * If there are some characters left after the decoded data, this function
      44             : * throws DecodingError.
      45             : */
      46          38 : std::unique_ptr<BItem> Decoder::decode(const std::string &data) {
      47          76 :     std::istringstream input(data);
      48          38 :     auto decodedData = decode(input);
      49          16 :     validateInputDoesNotContainUndecodedCharacters(input);
      50          30 :     return decodedData;
      51             : }
      52             : 
      53             : /**
      54             : * @brief Reads the data from the given @a input, decodes them and returns them.
      55             : *
      56             : * If there are some characters left after the decoding, they are left in @a
      57             : * input, i.e. they are not read. This behavior differs for the overload of
      58             : * decode() that takes @c std::string as the input.
      59             : */
      60          63 : std::unique_ptr<BItem> Decoder::decode(std::istream &input) {
      61          63 :     switch (input.peek()) {
      62           6 :         case 'd':
      63           6 :             return decodeDictionary(input);
      64          29 :         case 'i':
      65          29 :             return decodeInteger(input);
      66           6 :         case 'l':
      67           6 :             return decodeList(input);
      68          13 :         case '0':
      69             :         case '1':
      70             :         case '2':
      71             :         case '3':
      72             :         case '4':
      73             :         case '5':
      74             :         case '6':
      75             :         case '7':
      76             :         case '8':
      77             :         case '9':
      78          13 :             return decodeString(input);
      79           9 :         default:
      80          18 :             throw DecodingError(std::string("unexpected character: '") +
      81          27 :                 static_cast<char>(input.peek()) + "'");
      82             :     }
      83             : 
      84             :     assert(false && "should never happen");
      85             :     return std::unique_ptr<BItem>();
      86             : }
      87             : 
      88             : /**
      89             : * @brief Reads @a expected_char from @a input and discards it.
      90             : */
      91          31 : void Decoder::readExpectedChar(std::istream &input, char expected_char) const {
      92          31 :     int c = input.get();
      93          31 :     if (c != expected_char) {
      94           0 :         throw DecodingError(std::string("expected '") + expected_char +
      95           0 :             "', got '" + static_cast<char>(c) + "'");
      96             :     }
      97          31 : }
      98             : 
      99             : /**
     100             : * @brief Decodes a dictionary from @a input.
     101             : *
     102             : * @par Format
     103             : * @code
     104             : * d<bencoded string><bencoded element>e
     105             : * @endcode
     106             : *
     107             : * @par Example
     108             : * @code
     109             : * d3:cow3:moo4:spam4:eggse represents the dictionary {"cow": "moo", "spam": "eggs"}
     110             : * d4:spaml1:a1:bee represents the dictionary {"spam": ["a", "b"]}
     111             : * @endcode
     112             : *
     113             : * The keys must be bencoded strings. The values may be any bencoded type,
     114             : * including integers, strings, lists, and other dictionaries. This function
     115             : * supports decoding of dictionaries whose keys are not lexicographically sorted
     116             : * (according to the <a
     117             : * href="https://wiki.theory.org/BitTorrentSpecification#Bencoding">specification</a>,
     118             : * they must be sorted).
     119             : */
     120           6 : std::unique_ptr<BDictionary> Decoder::decodeDictionary(std::istream &input) {
     121           6 :     readExpectedChar(input, 'd');
     122           6 :     auto bDictionary = decodeDictionaryItemsIntoDictionary(input);
     123           4 :     readExpectedChar(input, 'e');
     124           4 :     return bDictionary;
     125             : }
     126             : 
     127             : /**
     128             : * @brief Decodes items from @a input, adds them to a dictionary, and returns
     129             : *        that dictionary.
     130             : */
     131           6 : std::unique_ptr<BDictionary> Decoder::decodeDictionaryItemsIntoDictionary(
     132             :         std::istream &input) {
     133           6 :     auto bDictionary = BDictionary::create();
     134          16 :     while (input && input.peek() != 'e') {
     135          12 :         std::shared_ptr<BString> key(decodeDictionaryKey(input));
     136          10 :         std::shared_ptr<BItem> value(decodeDictionaryValue(input));
     137           5 :         (*bDictionary)[key] = value;
     138             :     }
     139           4 :     return bDictionary;
     140             : }
     141             : 
     142             : /**
     143             : * @brief Decodes a dictionary key from @a input.
     144             : */
     145           7 : std::shared_ptr<BString> Decoder::decodeDictionaryKey(std::istream &input) {
     146          13 :     std::shared_ptr<BItem> key(decode(input));
     147             :     // A dictionary key has to be a string.
     148           6 :     std::shared_ptr<BString> keyAsBString(key->as<BString>());
     149           6 :     if (!keyAsBString) {
     150             :         throw DecodingError(
     151             :             "found a dictionary key that is not a bencoded string"
     152           1 :         );
     153             :     }
     154          10 :     return keyAsBString;
     155             : }
     156             : 
     157             : /**
     158             : * @brief Decodes a dictionary value from @a input.
     159             : */
     160           5 : std::unique_ptr<BItem> Decoder::decodeDictionaryValue(std::istream &input) {
     161           5 :     return decode(input);
     162             : }
     163             : 
     164             : /**
     165             : * @brief Decodes an integer from @a input.
     166             : *
     167             : * @par Format
     168             : * @code
     169             : * i<integer encoded in base ten ASCII>e
     170             : * @endcode
     171             : *
     172             : * @par Example
     173             : * @code
     174             : * i3e represents the integer 3
     175             : * @endcode
     176             : *
     177             : * Moreover, only the significant digits should be used, one cannot pad the
     178             : * integer with zeroes, such as @c i04e (see the <a
     179             : * href="https://wiki.theory.org/BitTorrentSpecification#Bencoding">
     180             : * specification</a>).
     181             : */
     182          29 : std::unique_ptr<BInteger> Decoder::decodeInteger(std::istream &input) const {
     183          29 :     return decodeEncodedInteger(readEncodedInteger(input));
     184             : }
     185             : 
     186             : /**
     187             : * @brief Reads an encoded integer from @a input.
     188             : */
     189          29 : std::string Decoder::readEncodedInteger(std::istream &input) const {
     190             :     // See the description of decodeInteger() for the format and example.
     191          29 :     std::string encodedInteger;
     192          29 :     bool encodedIntegerReadCorrectly = readUntil(input, encodedInteger, 'e');
     193          29 :     if (!encodedIntegerReadCorrectly) {
     194           2 :         throw DecodingError("error during the decoding of an integer near '" +
     195           3 :             encodedInteger + "'");
     196             :     }
     197             : 
     198          28 :     return encodedInteger;
     199             : }
     200             : 
     201             : /**
     202             : * @brief Decodes the given encoded integer.
     203             : */
     204          28 : std::unique_ptr<BInteger> Decoder::decodeEncodedInteger(
     205             :         const std::string &encodedInteger) const {
     206             :     // See the description of decodeInteger() for the format and example.
     207          56 :     std::regex integerRegex("i([-+]?(0|[1-9][0-9]*))e");
     208          56 :     std::smatch match;
     209          28 :     bool valid = std::regex_match(encodedInteger, match, integerRegex);
     210          28 :     if (!valid) {
     211          22 :         throw DecodingError("encountered an encoded integer of invalid format: '" +
     212          33 :             encodedInteger + "'");
     213             :     }
     214             : 
     215             :     BInteger::ValueType integerValue;
     216          17 :     strToNum(match[1].str(), integerValue);
     217          34 :     return BInteger::create(integerValue);
     218             : }
     219             : 
     220             : /**
     221             : * @brief Decodes a list from @a input.
     222             : *
     223             : * @par Format
     224             : * @code
     225             : * l<bencoded values>e
     226             : * @endcode
     227             : *
     228             : * @par Example
     229             : * @code
     230             : * l4:spam4:eggse represents a list containing two strings "spam" and "eggs"
     231             : * @endcode
     232             : */
     233           6 : std::unique_ptr<BList> Decoder::decodeList(std::istream &input) {
     234           6 :     readExpectedChar(input, 'l');
     235           6 :     auto bList = decodeListItemsIntoList(input);
     236           3 :     readExpectedChar(input, 'e');
     237           3 :     return bList;
     238             : }
     239             : 
     240             : /**
     241             : * @brief Decodes items from @a input, appends them to a list, and returns that
     242             : *        list.
     243             : */
     244           6 : std::unique_ptr<BList> Decoder::decodeListItemsIntoList(std::istream &input) {
     245           6 :     auto bList = BList::create();
     246          16 :     while (input && input.peek() != 'e') {
     247           8 :         bList->push_back(decode(input));
     248             :     }
     249           3 :     return bList;
     250             : }
     251             : 
     252             : /**
     253             : * @brief Decodes a string from @a input.
     254             : *
     255             : * @par Format
     256             : * @code
     257             : * <string length encoded in base ten ASCII>:<string data>
     258             : * @endcode
     259             : *
     260             : * @par Example
     261             : * @code
     262             : * 4:test represents the string "test"
     263             : * @endcode
     264             : */
     265          13 : std::unique_ptr<BString> Decoder::decodeString(std::istream &input) const {
     266          13 :     std::string::size_type stringLength(readStringLength(input));
     267          12 :     readExpectedChar(input, ':');
     268          23 :     std::string str(readStringOfGivenLength(input, stringLength));
     269          22 :     return BString::create(str);
     270             : }
     271             : 
     272             : /**
     273             : * @brief Reads the string length from @a input, validates it, and returns it.
     274             : */
     275          13 : std::string::size_type Decoder::readStringLength(std::istream &input) const {
     276          26 :     std::string stringLengthInASCII;
     277          13 :     bool stringLengthInASCIIReadCorrectly = readUpTo(input, stringLengthInASCII, ':');
     278          13 :     if (!stringLengthInASCIIReadCorrectly) {
     279           2 :         throw DecodingError("error during the decoding of a string near '" +
     280           3 :             stringLengthInASCII + "'");
     281             :     }
     282             : 
     283             :     std::string::size_type stringLength;
     284          12 :     bool stringLengthIsValid = strToNum(stringLengthInASCII, stringLength);
     285          12 :     if (!stringLengthIsValid) {
     286           0 :         throw DecodingError("invalid string length: '" + stringLengthInASCII + "'");
     287             :     }
     288             : 
     289          24 :     return stringLength;
     290             : }
     291             : 
     292             : /**
     293             : * @brief Reads a string of the given @a length from @a input and returns it.
     294             : */
     295          12 : std::string Decoder::readStringOfGivenLength(std::istream &input,
     296             :         std::string::size_type length) const {
     297          12 :     std::string str(length, char());
     298          12 :     input.read(&str[0], length);
     299          12 :     std::string::size_type numOfReadChars(input.gcount());
     300          12 :     if (numOfReadChars != length) {
     301           2 :         throw DecodingError("expected a string containing " + std::to_string(length) +
     302           4 :             " characters, but read only " + std::to_string(numOfReadChars) +
     303           3 :             " characters");
     304             :     }
     305          11 :     return str;
     306             : }
     307             : 
     308             : /**
     309             : * @brief Throws DecodingError if @a input has not been completely read.
     310             : */
     311          16 : void Decoder::validateInputDoesNotContainUndecodedCharacters(std::istream &input) {
     312          16 :     if (input.peek() != std::char_traits<char>::eof()) {
     313           1 :         throw DecodingError("input contains undecoded characters");
     314             :     }
     315          15 : }
     316             : 
     317             : /**
     318             : * @brief Decodes the given bencoded @a data and returns them.
     319             : *
     320             : * This function can be handy if you just want to decode bencoded data without
     321             : * explicitly creating a decoder and calling @c decode() on it.
     322             : *
     323             : * See Decoder::decode() for more details.
     324             : */
     325           1 : std::unique_ptr<BItem> decode(const std::string &data) {
     326           2 :     auto decoder = Decoder::create();
     327           2 :     return decoder->decode(data);
     328             : }
     329             : 
     330             : /**
     331             : * @brief Reads all the data from the given @a input, decodes them and returns
     332             : *        them.
     333             : *
     334             : * This function can be handy if you just want to decode bencoded data without
     335             : * explicitly creating a decoder and calling @c decode() on it.
     336             : *
     337             : * See Decoder::decode() for more details.
     338             : */
     339           1 : std::unique_ptr<BItem> decode(std::istream &input) {
     340           2 :     auto decoder = Decoder::create();
     341           2 :     return decoder->decode(input);
     342             : }
     343             : 
     344             : } // namespace bencoding

Generated by: LCOV version 1.13