LCOV - code coverage report
Current view: top level - src/ar/internal - extractor.cpp (source / functions) Hit Total Coverage
Test: ar-cpp code coverage Lines: 158 163 96.9 %
Date: 2017-12-27 13:15:38 Functions: 34 35 97.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : ///
       2             : /// @file      ar/internal/extractor.cpp
       3             : /// @copyright (c) 2015 by Petr Zemek (s3rvac@gmail.com) and contributors
       4             : /// @license   MIT, see the @c LICENSE file for more details
       5             : /// @brief     Implementation of the extractor of files from archives.
       6             : ///
       7             : 
       8             : #include <cctype>
       9             : 
      10             : #include "ar/exceptions.h"
      11             : #include "ar/file.h"
      12             : #include "ar/internal/extractor.h"
      13             : #include "ar/internal/files/string_file.h"
      14             : 
      15             : using namespace std::literals::string_literals;
      16             : 
      17             : namespace ar {
      18             : namespace internal {
      19             : 
      20             : namespace {
      21             : 
      22           1 : const auto MagicString = "!<arch>\n"s;
      23           1 : const auto FileHeaderEnd = "`\n"s;
      24             : 
      25             : } // anonymous namespace
      26             : 
      27          20 : Extractor::Extractor():
      28          20 :     content(), i(0) {}
      29             : 
      30             : Extractor::~Extractor() = default;
      31             : 
      32             : ///
      33             : /// Extracts files from the given archive content.
      34             : ///
      35             : /// @throws InvalidArchiveError when the archive is invalid.
      36             : ///
      37          20 : Files Extractor::extract(const std::string& archiveContent) {
      38          20 :     initializeWith(archiveContent);
      39          20 :     readMagicString();
      40          18 :     readLookupTable();
      41          18 :     readFileNameTable();
      42          13 :     auto files = readFiles();
      43           6 :     return files;
      44             : }
      45             : 
      46          20 : void Extractor::initializeWith(const std::string& archiveContent) {
      47          20 :     content = archiveContent;
      48          20 :     i = 0;
      49          20 :     fileNameTable.clear();
      50          20 : }
      51             : 
      52          20 : void Extractor::readMagicString() {
      53             :     // The magic string should appear at the beginning of every archive.
      54          20 :     if (content.substr(i, MagicString.size()) != MagicString) {
      55           2 :         throw InvalidArchiveError{"missing magic string"};
      56             :     }
      57          18 :     i += MagicString.size();
      58          18 : }
      59             : 
      60          18 : void Extractor::readLookupTable() {
      61             :     // In the GNU format, the special file name '/' denotes a lookup table.
      62             :     // However, we need to ensure that it is just a standalone '/' because "//"
      63             :     // denotes the start of a filename table.
      64          18 :     if (hasLookupTableAt(i)) {
      65             :         // The lookup table has the same format as a file. However, as we do
      66             :         // not need it, throw it away after reading (i.e. do not store its
      67             :         // content).
      68           2 :         ++i;
      69           2 :         readFileTimestamp();
      70           2 :         readFileOwnerId();
      71           2 :         readFileGroupId();
      72           2 :         readFileMode();
      73           2 :         auto fileSize = readFileSize();
      74           2 :         readUntilEndOfFileHeader();
      75           2 :         readFileContent(fileSize);
      76             :     }
      77          18 : }
      78             : 
      79          18 : bool Extractor::hasLookupTableAt(std::size_t i) const {
      80          18 :     return isValid(i) && content[i] == '/' && content.substr(i, 2) != "//";
      81             : }
      82             : 
      83          18 : void Extractor::readFileNameTable() {
      84             :     // In the GNU format, the special file name "//" denotes a filename table.
      85             :     // It contains names of files, one by line, that are referenced by
      86             :     // subsequent file headers. It is used to store file names that are longer
      87             :     // than 16 chars.
      88             :     //
      89             :     // Example:
      90             :     //
      91             :     //   !<arch>\n
      92             :     //   //                                              42        `\n
      93             :     //   very_long_name_of_a_module_in_archive.o/\n
      94             :     //   \n
      95             :     //   /0              0           0     0     644     22        `\n
      96             :     //   contents of the module
      97             :     //
      98             :     // The references are of the form "/X", where X is the index into the
      99             :     // filename table.
     100          18 :     if (content.substr(i, 2) == "//") {
     101           8 :         i += 2;
     102          10 :         const auto tableSize = readNumber("filename table size");
     103           6 :         readUntilEndOfFileHeader();
     104           5 :         const auto tableStart = i;
     105           5 :         const auto tableEnd = i + tableSize;
     106          11 :         while (i < tableEnd) {
     107           5 :             readFileNameIntoFileNameTable(tableStart);
     108             :         }
     109             :     }
     110          13 : }
     111             : 
     112           5 : void Extractor::readFileNameIntoFileNameTable(std::size_t tableStart) {
     113             :     // A row in the filename table in the GNU variant is of the form
     114             :     //
     115             :     //   module.o/
     116             :     //
     117           5 :     const auto tableIndex = i - tableStart;
     118           8 :     auto fileName = readFileNameEndedWithSlash();
     119           3 :     fileNameTable.emplace(tableIndex, std::move(fileName));
     120             : 
     121             :     // Skip separators/padding.
     122           3 :     skipEndsOfLines();
     123           3 : }
     124             : 
     125          13 : Files Extractor::readFiles() {
     126          13 :     Files files;
     127          21 :     while (i < content.size()) {
     128          11 :         files.push_back(readFile());
     129             :     }
     130           6 :     return files;
     131             : }
     132             : 
     133          11 : std::unique_ptr<File> Extractor::readFile() {
     134          18 :     auto fileName = readFileName();
     135           7 :     readFileTimestamp();
     136           7 :     readFileOwnerId();
     137           7 :     readFileGroupId();
     138           7 :     readFileMode();
     139           7 :     auto fileSize = readFileSize();
     140           6 :     readUntilEndOfFileHeader();
     141           9 :     auto fileContent = readFileContent(fileSize);
     142             : 
     143           8 :     return std::make_unique<StringFile>(fileContent, fileName);
     144             : }
     145             : 
     146          11 : std::string Extractor::readFileName() {
     147             :     // In the GNU variant, the name of the file can be either an index into the
     148             :     // filename table:
     149             :     //
     150             :     //   /X
     151             :     //
     152             :     // or a slash-ended name:
     153             :     //
     154             :     //   module.o/
     155             :     //
     156          11 :     if (hasNameSpecifiedViaIndexIntoFileNameTableAt(i)) {
     157           2 :         ++i;
     158           2 :         const auto index = readNumber("index into filename table");
     159           2 :         return nameFromFileNameTableOnIndex(index);
     160             :     } else {
     161           9 :         return readFileNameEndedWithSlash();
     162             :     }
     163             : }
     164             : 
     165          11 : bool Extractor::hasNameSpecifiedViaIndexIntoFileNameTableAt(std::size_t j) const {
     166             :     // The index specification has to be of the form
     167             :     //
     168             :     //   /X
     169             :     //
     170             :     // where X is a number (the index).
     171          11 :     return isValid(j + 1) && content[j] == '/' && std::isdigit(content[j + 1]);
     172             : }
     173             : 
     174          14 : std::string Extractor::readFileNameEndedWithSlash() {
     175          14 :     auto pos = content.find('/', i);
     176          14 :     ensureContainsSlashOnPosition(pos);
     177          12 :     auto fileName = content.substr(i, pos - i);
     178          12 :     ensureFileNameIsNonEmpty(fileName);
     179           9 :     i = pos + 1;
     180           9 :     return fileName;
     181             : }
     182             : 
     183           2 : std::string Extractor::nameFromFileNameTableOnIndex(std::size_t index) const {
     184           2 :     auto it = fileNameTable.find(index);
     185           2 :     ensureIsValidFileNameTableIndex(it, index);
     186           1 :     return it->second;
     187             : }
     188             : 
     189           9 : void Extractor::readFileTimestamp() {
     190             :     // Currently unused.
     191           9 :     readNumber("timestamp");
     192           9 : }
     193             : 
     194           9 : void Extractor::readFileOwnerId() {
     195             :     // Currently unused.
     196           9 :     readNumber("file owner ID");
     197           9 : }
     198             : 
     199           9 : void Extractor::readFileGroupId() {
     200             :     // Currently unused.
     201           9 :     readNumber("file group ID");
     202           9 : }
     203             : 
     204           9 : void Extractor::readFileMode() {
     205             :     // Currently unused.
     206           9 :     readNumber("file mode");
     207           9 : }
     208             : 
     209           9 : std::size_t Extractor::readFileSize() {
     210           9 :     return readNumber("file size");
     211             : }
     212             : 
     213          14 : void Extractor::readUntilEndOfFileHeader() {
     214          14 :     auto pos = content.find(FileHeaderEnd, i);
     215          14 :     ensureContainsFileHeaderOnPosition(pos);
     216          12 :     i = pos + FileHeaderEnd.size();
     217          12 : }
     218             : 
     219           7 : std::string Extractor::readFileContent(std::size_t fileSize) {
     220           7 :     auto fileContent = content.substr(i, fileSize);
     221           7 :     ensureContentOfGivenSizeWasRead(fileContent.size(), fileSize);
     222           6 :     i += fileSize;
     223           6 :     return fileContent;
     224             : }
     225             : 
     226         871 : bool Extractor::isValid(std::size_t j) const noexcept {
     227         871 :     return j < content.size();
     228             : }
     229             : 
     230           0 : std::string::value_type Extractor::charAt(std::size_t j) const {
     231           0 :     if (!isValid(j)) {
     232             :         throw InvalidArchiveError{
     233           0 :             "premature end of archive at byte " + std::to_string(j)
     234           0 :         };
     235             :     }
     236           0 :     return content[j];
     237             : }
     238             : 
     239          55 : void Extractor::skipSpaces() {
     240          55 :     skipSuccessiveChars(' ');
     241          55 : }
     242             : 
     243           3 : void Extractor::skipEndsOfLines() {
     244           3 :     skipSuccessiveChars('\n');
     245           3 : }
     246             : 
     247         705 : void Extractor::skipSuccessiveChars(char c) {
     248        1352 :     while (isValid(i) && content[i] == c) {
     249         647 :         ++i;
     250             :     }
     251          58 : }
     252             : 
     253          55 : std::size_t Extractor::readNumber(const std::string& name) {
     254          55 :     skipSpaces();
     255             : 
     256         110 :     std::string numAsStr;
     257         219 :     while (isValid(i) && std::isdigit(content[i])) {
     258          82 :         numAsStr += content[i];
     259          82 :         ++i;
     260             :     }
     261          55 :     ensureNumberWasRead(numAsStr, name);
     262         104 :     return std::stoull(numAsStr);
     263             : }
     264             : 
     265           2 : void Extractor::ensureIsValidFileNameTableIndex(FileNameTable::const_iterator it,
     266             :         std::size_t index) const {
     267           2 :     if (it == fileNameTable.end()) {
     268             :         throw InvalidArchiveError{
     269           2 :             "invalid index into filename table: " + std::to_string(index)
     270           3 :         };
     271             :     }
     272           1 : }
     273             : 
     274          12 : void Extractor::ensureFileNameIsNonEmpty(const std::string& fileName) const {
     275          12 :     if (fileName.empty()) {
     276           3 :         throw InvalidArchiveError{"file has an empty name"};
     277             :     }
     278           9 : }
     279             : 
     280          14 : void Extractor::ensureContainsSlashOnPosition(
     281             :         std::string::size_type pos) const {
     282          14 :     if (pos == std::string::npos) {
     283           2 :         throw InvalidArchiveError{"missing '/' after file name"};
     284             :     }
     285          12 : }
     286             : 
     287          14 : void Extractor::ensureContainsFileHeaderOnPosition(
     288             :         std::string::size_type pos) const {
     289          14 :     if (pos == std::string::npos) {
     290           2 :         throw InvalidArchiveError{"missing end of file header"};
     291             :     }
     292          12 : }
     293             : 
     294           7 : void Extractor::ensureContentOfGivenSizeWasRead(std::size_t readContentSize,
     295             :         std::size_t expectedContentSize) const {
     296           7 :     if (readContentSize != expectedContentSize) {
     297             :         throw InvalidArchiveError{
     298           2 :             "premature end of file (expected " +
     299           4 :             std::to_string(expectedContentSize) +
     300           2 :             " bytes, read " +
     301           4 :             std::to_string(readContentSize) +
     302             :             " bytes)"
     303           3 :         };
     304             :     }
     305           6 : }
     306             : 
     307          55 : void Extractor::ensureNumberWasRead(const std::string& numAsStr,
     308             :         const std::string& name) const {
     309          55 :     if (numAsStr.empty()) {
     310           3 :         throw InvalidArchiveError{"missing number (" + name + ")"};
     311             :     }
     312          52 : }
     313             : 
     314             : } // namespace internal
     315           3 : } // namespace ar

Generated by: LCOV version 1.13