Line data Source code
1 : /**
2 : * @file Decoder.cpp
3 : * @copyright (c) 2014 by Petr Zemek (s3rvac@gmail.com) and contributors
4 : * @license BSD, see the @c LICENSE file for more details
5 : * @brief Implementation of the Decoder class.
6 : */
7 :
8 : #include "Decoder.h"
9 :
10 : #include <cassert>
11 : #include <regex>
12 : #include <sstream>
13 :
14 : #include "BDictionary.h"
15 : #include "BInteger.h"
16 : #include "BList.h"
17 : #include "BString.h"
18 : #include "Utils.h"
19 :
20 : namespace bencoding {
21 :
22 : /**
23 : * @brief Constructs a new exception with the given message.
24 : */
25 25 : DecodingError::DecodingError(const std::string &what):
26 25 : std::runtime_error(what) {}
27 :
28 : /**
29 : * @brief Constructs a decoder.
30 : */
31 38 : Decoder::Decoder() {}
32 :
33 : /**
34 : * @brief Creates a new decoder.
35 : */
36 38 : std::unique_ptr<Decoder> Decoder::create() {
37 38 : return std::unique_ptr<Decoder>(new Decoder());
38 : }
39 :
40 : /**
41 : * @brief Decodes the given bencoded @a data and returns them.
42 : *
43 : * If there are some characters left after the decoded data, this function
44 : * throws DecodingError.
45 : */
46 38 : std::unique_ptr<BItem> Decoder::decode(const std::string &data) {
47 76 : std::istringstream input(data);
48 38 : auto decodedData = decode(input);
49 16 : validateInputDoesNotContainUndecodedCharacters(input);
50 30 : return decodedData;
51 : }
52 :
53 : /**
54 : * @brief Reads the data from the given @a input, decodes them and returns them.
55 : *
56 : * If there are some characters left after the decoding, they are left in @a
57 : * input, i.e. they are not read. This behavior differs for the overload of
58 : * decode() that takes @c std::string as the input.
59 : */
60 63 : std::unique_ptr<BItem> Decoder::decode(std::istream &input) {
61 63 : switch (input.peek()) {
62 6 : case 'd':
63 6 : return decodeDictionary(input);
64 29 : case 'i':
65 29 : return decodeInteger(input);
66 6 : case 'l':
67 6 : return decodeList(input);
68 13 : case '0':
69 : case '1':
70 : case '2':
71 : case '3':
72 : case '4':
73 : case '5':
74 : case '6':
75 : case '7':
76 : case '8':
77 : case '9':
78 13 : return decodeString(input);
79 9 : default:
80 18 : throw DecodingError(std::string("unexpected character: '") +
81 27 : static_cast<char>(input.peek()) + "'");
82 : }
83 :
84 : assert(false && "should never happen");
85 : return std::unique_ptr<BItem>();
86 : }
87 :
88 : /**
89 : * @brief Reads @a expected_char from @a input and discards it.
90 : */
91 31 : void Decoder::readExpectedChar(std::istream &input, char expected_char) const {
92 31 : int c = input.get();
93 31 : if (c != expected_char) {
94 0 : throw DecodingError(std::string("expected '") + expected_char +
95 0 : "', got '" + static_cast<char>(c) + "'");
96 : }
97 31 : }
98 :
99 : /**
100 : * @brief Decodes a dictionary from @a input.
101 : *
102 : * @par Format
103 : * @code
104 : * d<bencoded string><bencoded element>e
105 : * @endcode
106 : *
107 : * @par Example
108 : * @code
109 : * d3:cow3:moo4:spam4:eggse represents the dictionary {"cow": "moo", "spam": "eggs"}
110 : * d4:spaml1:a1:bee represents the dictionary {"spam": ["a", "b"]}
111 : * @endcode
112 : *
113 : * The keys must be bencoded strings. The values may be any bencoded type,
114 : * including integers, strings, lists, and other dictionaries. This function
115 : * supports decoding of dictionaries whose keys are not lexicographically sorted
116 : * (according to the <a
117 : * href="https://wiki.theory.org/BitTorrentSpecification#Bencoding">specification</a>,
118 : * they must be sorted).
119 : */
120 6 : std::unique_ptr<BDictionary> Decoder::decodeDictionary(std::istream &input) {
121 6 : readExpectedChar(input, 'd');
122 6 : auto bDictionary = decodeDictionaryItemsIntoDictionary(input);
123 4 : readExpectedChar(input, 'e');
124 4 : return bDictionary;
125 : }
126 :
127 : /**
128 : * @brief Decodes items from @a input, adds them to a dictionary, and returns
129 : * that dictionary.
130 : */
131 6 : std::unique_ptr<BDictionary> Decoder::decodeDictionaryItemsIntoDictionary(
132 : std::istream &input) {
133 6 : auto bDictionary = BDictionary::create();
134 16 : while (input && input.peek() != 'e') {
135 12 : std::shared_ptr<BString> key(decodeDictionaryKey(input));
136 10 : std::shared_ptr<BItem> value(decodeDictionaryValue(input));
137 5 : (*bDictionary)[key] = value;
138 : }
139 4 : return bDictionary;
140 : }
141 :
142 : /**
143 : * @brief Decodes a dictionary key from @a input.
144 : */
145 7 : std::shared_ptr<BString> Decoder::decodeDictionaryKey(std::istream &input) {
146 13 : std::shared_ptr<BItem> key(decode(input));
147 : // A dictionary key has to be a string.
148 6 : std::shared_ptr<BString> keyAsBString(key->as<BString>());
149 6 : if (!keyAsBString) {
150 : throw DecodingError(
151 : "found a dictionary key that is not a bencoded string"
152 1 : );
153 : }
154 10 : return keyAsBString;
155 : }
156 :
157 : /**
158 : * @brief Decodes a dictionary value from @a input.
159 : */
160 5 : std::unique_ptr<BItem> Decoder::decodeDictionaryValue(std::istream &input) {
161 5 : return decode(input);
162 : }
163 :
164 : /**
165 : * @brief Decodes an integer from @a input.
166 : *
167 : * @par Format
168 : * @code
169 : * i<integer encoded in base ten ASCII>e
170 : * @endcode
171 : *
172 : * @par Example
173 : * @code
174 : * i3e represents the integer 3
175 : * @endcode
176 : *
177 : * Moreover, only the significant digits should be used, one cannot pad the
178 : * integer with zeroes, such as @c i04e (see the <a
179 : * href="https://wiki.theory.org/BitTorrentSpecification#Bencoding">
180 : * specification</a>).
181 : */
182 29 : std::unique_ptr<BInteger> Decoder::decodeInteger(std::istream &input) const {
183 29 : return decodeEncodedInteger(readEncodedInteger(input));
184 : }
185 :
186 : /**
187 : * @brief Reads an encoded integer from @a input.
188 : */
189 29 : std::string Decoder::readEncodedInteger(std::istream &input) const {
190 : // See the description of decodeInteger() for the format and example.
191 29 : std::string encodedInteger;
192 29 : bool encodedIntegerReadCorrectly = readUntil(input, encodedInteger, 'e');
193 29 : if (!encodedIntegerReadCorrectly) {
194 2 : throw DecodingError("error during the decoding of an integer near '" +
195 3 : encodedInteger + "'");
196 : }
197 :
198 28 : return encodedInteger;
199 : }
200 :
201 : /**
202 : * @brief Decodes the given encoded integer.
203 : */
204 28 : std::unique_ptr<BInteger> Decoder::decodeEncodedInteger(
205 : const std::string &encodedInteger) const {
206 : // See the description of decodeInteger() for the format and example.
207 56 : std::regex integerRegex("i([-+]?(0|[1-9][0-9]*))e");
208 56 : std::smatch match;
209 28 : bool valid = std::regex_match(encodedInteger, match, integerRegex);
210 28 : if (!valid) {
211 22 : throw DecodingError("encountered an encoded integer of invalid format: '" +
212 33 : encodedInteger + "'");
213 : }
214 :
215 : BInteger::ValueType integerValue;
216 17 : strToNum(match[1].str(), integerValue);
217 34 : return BInteger::create(integerValue);
218 : }
219 :
220 : /**
221 : * @brief Decodes a list from @a input.
222 : *
223 : * @par Format
224 : * @code
225 : * l<bencoded values>e
226 : * @endcode
227 : *
228 : * @par Example
229 : * @code
230 : * l4:spam4:eggse represents a list containing two strings "spam" and "eggs"
231 : * @endcode
232 : */
233 6 : std::unique_ptr<BList> Decoder::decodeList(std::istream &input) {
234 6 : readExpectedChar(input, 'l');
235 6 : auto bList = decodeListItemsIntoList(input);
236 3 : readExpectedChar(input, 'e');
237 3 : return bList;
238 : }
239 :
240 : /**
241 : * @brief Decodes items from @a input, appends them to a list, and returns that
242 : * list.
243 : */
244 6 : std::unique_ptr<BList> Decoder::decodeListItemsIntoList(std::istream &input) {
245 6 : auto bList = BList::create();
246 16 : while (input && input.peek() != 'e') {
247 8 : bList->push_back(decode(input));
248 : }
249 3 : return bList;
250 : }
251 :
252 : /**
253 : * @brief Decodes a string from @a input.
254 : *
255 : * @par Format
256 : * @code
257 : * <string length encoded in base ten ASCII>:<string data>
258 : * @endcode
259 : *
260 : * @par Example
261 : * @code
262 : * 4:test represents the string "test"
263 : * @endcode
264 : */
265 13 : std::unique_ptr<BString> Decoder::decodeString(std::istream &input) const {
266 13 : std::string::size_type stringLength(readStringLength(input));
267 12 : readExpectedChar(input, ':');
268 23 : std::string str(readStringOfGivenLength(input, stringLength));
269 22 : return BString::create(str);
270 : }
271 :
272 : /**
273 : * @brief Reads the string length from @a input, validates it, and returns it.
274 : */
275 13 : std::string::size_type Decoder::readStringLength(std::istream &input) const {
276 26 : std::string stringLengthInASCII;
277 13 : bool stringLengthInASCIIReadCorrectly = readUpTo(input, stringLengthInASCII, ':');
278 13 : if (!stringLengthInASCIIReadCorrectly) {
279 2 : throw DecodingError("error during the decoding of a string near '" +
280 3 : stringLengthInASCII + "'");
281 : }
282 :
283 : std::string::size_type stringLength;
284 12 : bool stringLengthIsValid = strToNum(stringLengthInASCII, stringLength);
285 12 : if (!stringLengthIsValid) {
286 0 : throw DecodingError("invalid string length: '" + stringLengthInASCII + "'");
287 : }
288 :
289 24 : return stringLength;
290 : }
291 :
292 : /**
293 : * @brief Reads a string of the given @a length from @a input and returns it.
294 : */
295 12 : std::string Decoder::readStringOfGivenLength(std::istream &input,
296 : std::string::size_type length) const {
297 12 : std::string str(length, char());
298 12 : input.read(&str[0], length);
299 12 : std::string::size_type numOfReadChars(input.gcount());
300 12 : if (numOfReadChars != length) {
301 2 : throw DecodingError("expected a string containing " + std::to_string(length) +
302 4 : " characters, but read only " + std::to_string(numOfReadChars) +
303 3 : " characters");
304 : }
305 11 : return str;
306 : }
307 :
308 : /**
309 : * @brief Throws DecodingError if @a input has not been completely read.
310 : */
311 16 : void Decoder::validateInputDoesNotContainUndecodedCharacters(std::istream &input) {
312 16 : if (input.peek() != std::char_traits<char>::eof()) {
313 1 : throw DecodingError("input contains undecoded characters");
314 : }
315 15 : }
316 :
317 : /**
318 : * @brief Decodes the given bencoded @a data and returns them.
319 : *
320 : * This function can be handy if you just want to decode bencoded data without
321 : * explicitly creating a decoder and calling @c decode() on it.
322 : *
323 : * See Decoder::decode() for more details.
324 : */
325 1 : std::unique_ptr<BItem> decode(const std::string &data) {
326 2 : auto decoder = Decoder::create();
327 2 : return decoder->decode(data);
328 : }
329 :
330 : /**
331 : * @brief Reads all the data from the given @a input, decodes them and returns
332 : * them.
333 : *
334 : * This function can be handy if you just want to decode bencoded data without
335 : * explicitly creating a decoder and calling @c decode() on it.
336 : *
337 : * See Decoder::decode() for more details.
338 : */
339 1 : std::unique_ptr<BItem> decode(std::istream &input) {
340 2 : auto decoder = Decoder::create();
341 2 : return decoder->decode(input);
342 : }
343 :
344 : } // namespace bencoding
|