Line data Source code
1 : ///
2 : /// @file ar/internal/extractor.cpp
3 : /// @copyright (c) 2015 by Petr Zemek (s3rvac@gmail.com) and contributors
4 : /// @license MIT, see the @c LICENSE file for more details
5 : /// @brief Implementation of the extractor of files from archives.
6 : ///
7 :
8 : #include <cctype>
9 :
10 : #include "ar/exceptions.h"
11 : #include "ar/file.h"
12 : #include "ar/internal/extractor.h"
13 : #include "ar/internal/files/string_file.h"
14 :
15 : using namespace std::literals::string_literals;
16 :
17 : namespace ar {
18 : namespace internal {
19 :
20 : namespace {
21 :
22 1 : const auto MagicString = "!<arch>\n"s;
23 1 : const auto FileHeaderEnd = "`\n"s;
24 :
25 : } // anonymous namespace
26 :
27 20 : Extractor::Extractor():
28 20 : content(), i(0) {}
29 :
30 : Extractor::~Extractor() = default;
31 :
32 : ///
33 : /// Extracts files from the given archive content.
34 : ///
35 : /// @throws InvalidArchiveError when the archive is invalid.
36 : ///
37 20 : Files Extractor::extract(const std::string& archiveContent) {
38 20 : initializeWith(archiveContent);
39 20 : readMagicString();
40 18 : readLookupTable();
41 18 : readFileNameTable();
42 13 : auto files = readFiles();
43 6 : return files;
44 : }
45 :
46 20 : void Extractor::initializeWith(const std::string& archiveContent) {
47 20 : content = archiveContent;
48 20 : i = 0;
49 20 : fileNameTable.clear();
50 20 : }
51 :
52 20 : void Extractor::readMagicString() {
53 : // The magic string should appear at the beginning of every archive.
54 20 : if (content.substr(i, MagicString.size()) != MagicString) {
55 2 : throw InvalidArchiveError{"missing magic string"};
56 : }
57 18 : i += MagicString.size();
58 18 : }
59 :
60 18 : void Extractor::readLookupTable() {
61 : // In the GNU format, the special file name '/' denotes a lookup table.
62 : // However, we need to ensure that it is just a standalone '/' because "//"
63 : // denotes the start of a filename table.
64 18 : if (hasLookupTableAt(i)) {
65 : // The lookup table has the same format as a file. However, as we do
66 : // not need it, throw it away after reading (i.e. do not store its
67 : // content).
68 2 : ++i;
69 2 : readFileTimestamp();
70 2 : readFileOwnerId();
71 2 : readFileGroupId();
72 2 : readFileMode();
73 2 : auto fileSize = readFileSize();
74 2 : readUntilEndOfFileHeader();
75 2 : readFileContent(fileSize);
76 : }
77 18 : }
78 :
79 18 : bool Extractor::hasLookupTableAt(std::size_t i) const {
80 18 : return isValid(i) && content[i] == '/' && content.substr(i, 2) != "//";
81 : }
82 :
83 18 : void Extractor::readFileNameTable() {
84 : // In the GNU format, the special file name "//" denotes a filename table.
85 : // It contains names of files, one by line, that are referenced by
86 : // subsequent file headers. It is used to store file names that are longer
87 : // than 16 chars.
88 : //
89 : // Example:
90 : //
91 : // !<arch>\n
92 : // // 42 `\n
93 : // very_long_name_of_a_module_in_archive.o/\n
94 : // \n
95 : // /0 0 0 0 644 22 `\n
96 : // contents of the module
97 : //
98 : // The references are of the form "/X", where X is the index into the
99 : // filename table.
100 18 : if (content.substr(i, 2) == "//") {
101 8 : i += 2;
102 10 : const auto tableSize = readNumber("filename table size");
103 6 : readUntilEndOfFileHeader();
104 5 : const auto tableStart = i;
105 5 : const auto tableEnd = i + tableSize;
106 11 : while (i < tableEnd) {
107 5 : readFileNameIntoFileNameTable(tableStart);
108 : }
109 : }
110 13 : }
111 :
112 5 : void Extractor::readFileNameIntoFileNameTable(std::size_t tableStart) {
113 : // A row in the filename table in the GNU variant is of the form
114 : //
115 : // module.o/
116 : //
117 5 : const auto tableIndex = i - tableStart;
118 8 : auto fileName = readFileNameEndedWithSlash();
119 3 : fileNameTable.emplace(tableIndex, std::move(fileName));
120 :
121 : // Skip separators/padding.
122 3 : skipEndsOfLines();
123 3 : }
124 :
125 13 : Files Extractor::readFiles() {
126 13 : Files files;
127 21 : while (i < content.size()) {
128 11 : files.push_back(readFile());
129 : }
130 6 : return files;
131 : }
132 :
133 11 : std::unique_ptr<File> Extractor::readFile() {
134 18 : auto fileName = readFileName();
135 7 : readFileTimestamp();
136 7 : readFileOwnerId();
137 7 : readFileGroupId();
138 7 : readFileMode();
139 7 : auto fileSize = readFileSize();
140 6 : readUntilEndOfFileHeader();
141 9 : auto fileContent = readFileContent(fileSize);
142 :
143 8 : return std::make_unique<StringFile>(fileContent, fileName);
144 : }
145 :
146 11 : std::string Extractor::readFileName() {
147 : // In the GNU variant, the name of the file can be either an index into the
148 : // filename table:
149 : //
150 : // /X
151 : //
152 : // or a slash-ended name:
153 : //
154 : // module.o/
155 : //
156 11 : if (hasNameSpecifiedViaIndexIntoFileNameTableAt(i)) {
157 2 : ++i;
158 2 : const auto index = readNumber("index into filename table");
159 2 : return nameFromFileNameTableOnIndex(index);
160 : } else {
161 9 : return readFileNameEndedWithSlash();
162 : }
163 : }
164 :
165 11 : bool Extractor::hasNameSpecifiedViaIndexIntoFileNameTableAt(std::size_t j) const {
166 : // The index specification has to be of the form
167 : //
168 : // /X
169 : //
170 : // where X is a number (the index).
171 11 : return isValid(j + 1) && content[j] == '/' && std::isdigit(content[j + 1]);
172 : }
173 :
174 14 : std::string Extractor::readFileNameEndedWithSlash() {
175 14 : auto pos = content.find('/', i);
176 14 : ensureContainsSlashOnPosition(pos);
177 12 : auto fileName = content.substr(i, pos - i);
178 12 : ensureFileNameIsNonEmpty(fileName);
179 9 : i = pos + 1;
180 9 : return fileName;
181 : }
182 :
183 2 : std::string Extractor::nameFromFileNameTableOnIndex(std::size_t index) const {
184 2 : auto it = fileNameTable.find(index);
185 2 : ensureIsValidFileNameTableIndex(it, index);
186 1 : return it->second;
187 : }
188 :
189 9 : void Extractor::readFileTimestamp() {
190 : // Currently unused.
191 9 : readNumber("timestamp");
192 9 : }
193 :
194 9 : void Extractor::readFileOwnerId() {
195 : // Currently unused.
196 9 : readNumber("file owner ID");
197 9 : }
198 :
199 9 : void Extractor::readFileGroupId() {
200 : // Currently unused.
201 9 : readNumber("file group ID");
202 9 : }
203 :
204 9 : void Extractor::readFileMode() {
205 : // Currently unused.
206 9 : readNumber("file mode");
207 9 : }
208 :
209 9 : std::size_t Extractor::readFileSize() {
210 9 : return readNumber("file size");
211 : }
212 :
213 14 : void Extractor::readUntilEndOfFileHeader() {
214 14 : auto pos = content.find(FileHeaderEnd, i);
215 14 : ensureContainsFileHeaderOnPosition(pos);
216 12 : i = pos + FileHeaderEnd.size();
217 12 : }
218 :
219 7 : std::string Extractor::readFileContent(std::size_t fileSize) {
220 7 : auto fileContent = content.substr(i, fileSize);
221 7 : ensureContentOfGivenSizeWasRead(fileContent.size(), fileSize);
222 6 : i += fileSize;
223 6 : return fileContent;
224 : }
225 :
226 871 : bool Extractor::isValid(std::size_t j) const noexcept {
227 871 : return j < content.size();
228 : }
229 :
230 0 : std::string::value_type Extractor::charAt(std::size_t j) const {
231 0 : if (!isValid(j)) {
232 : throw InvalidArchiveError{
233 0 : "premature end of archive at byte " + std::to_string(j)
234 0 : };
235 : }
236 0 : return content[j];
237 : }
238 :
239 55 : void Extractor::skipSpaces() {
240 55 : skipSuccessiveChars(' ');
241 55 : }
242 :
243 3 : void Extractor::skipEndsOfLines() {
244 3 : skipSuccessiveChars('\n');
245 3 : }
246 :
247 705 : void Extractor::skipSuccessiveChars(char c) {
248 1352 : while (isValid(i) && content[i] == c) {
249 647 : ++i;
250 : }
251 58 : }
252 :
253 55 : std::size_t Extractor::readNumber(const std::string& name) {
254 55 : skipSpaces();
255 :
256 110 : std::string numAsStr;
257 219 : while (isValid(i) && std::isdigit(content[i])) {
258 82 : numAsStr += content[i];
259 82 : ++i;
260 : }
261 55 : ensureNumberWasRead(numAsStr, name);
262 104 : return std::stoull(numAsStr);
263 : }
264 :
265 2 : void Extractor::ensureIsValidFileNameTableIndex(FileNameTable::const_iterator it,
266 : std::size_t index) const {
267 2 : if (it == fileNameTable.end()) {
268 : throw InvalidArchiveError{
269 2 : "invalid index into filename table: " + std::to_string(index)
270 3 : };
271 : }
272 1 : }
273 :
274 12 : void Extractor::ensureFileNameIsNonEmpty(const std::string& fileName) const {
275 12 : if (fileName.empty()) {
276 3 : throw InvalidArchiveError{"file has an empty name"};
277 : }
278 9 : }
279 :
280 14 : void Extractor::ensureContainsSlashOnPosition(
281 : std::string::size_type pos) const {
282 14 : if (pos == std::string::npos) {
283 2 : throw InvalidArchiveError{"missing '/' after file name"};
284 : }
285 12 : }
286 :
287 14 : void Extractor::ensureContainsFileHeaderOnPosition(
288 : std::string::size_type pos) const {
289 14 : if (pos == std::string::npos) {
290 2 : throw InvalidArchiveError{"missing end of file header"};
291 : }
292 12 : }
293 :
294 7 : void Extractor::ensureContentOfGivenSizeWasRead(std::size_t readContentSize,
295 : std::size_t expectedContentSize) const {
296 7 : if (readContentSize != expectedContentSize) {
297 : throw InvalidArchiveError{
298 2 : "premature end of file (expected " +
299 4 : std::to_string(expectedContentSize) +
300 2 : " bytes, read " +
301 4 : std::to_string(readContentSize) +
302 : " bytes)"
303 3 : };
304 : }
305 6 : }
306 :
307 55 : void Extractor::ensureNumberWasRead(const std::string& numAsStr,
308 : const std::string& name) const {
309 55 : if (numAsStr.empty()) {
310 3 : throw InvalidArchiveError{"missing number (" + name + ")"};
311 : }
312 52 : }
313 :
314 : } // namespace internal
315 3 : } // namespace ar
|