tarfile.cc

Go to the documentation of this file.
00001 ///
00002 /// \file       tarfile.cc
00003 ///             API for reading and writing sequentially from compressed
00004 ///             tar files.
00005 
00006 /*
00007     Copyright (C) 2007-2012, Chris Frey <cdfrey@foursquare.net>
00008 
00009     This program is free software; you can redistribute it and/or modify
00010     it under the terms of the GNU General Public License as published by
00011     the Free Software Foundation; either version 2 of the License, or
00012     (at your option) any later version.
00013 
00014     This program is distributed in the hope that it will be useful,
00015     but WITHOUT ANY WARRANTY; without even the implied warranty of
00016     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
00017 
00018     See the GNU General Public License in the COPYING file at the
00019     root directory of this project for more details.
00020 */
00021 
00022 #include "tarfile.h"
00023 #include "data.h"
00024 
00025 #include <fcntl.h>
00026 #include <errno.h>
00027 #include <string.h>
00028 #include <stdlib.h>
00029 
00030 namespace reuse {
00031 
00032 TarFile::TarFile(const char *filename,
00033                  bool create,
00034                  tartype_t *compress_ops,
00035                  bool always_throw)
00036         : m_tar(0),
00037         m_throw(always_throw),
00038         m_writemode(create)
00039 {
00040         // figure out how to handle the file flags/modes
00041         int flags = 0;
00042         mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
00043 
00044         if( m_writemode ) {
00045                 flags = O_WRONLY | O_CREAT | O_EXCL;
00046         }
00047         else {
00048                 flags = O_RDONLY;
00049         }
00050 
00051         // open... throw on error, as we are in the constructor
00052         if( tar_open(&m_tar, const_cast<char*>(filename),
00053                 compress_ops, flags, mode, TAR_VERBOSE | TAR_GNU) == -1 ) {
00054                 throw TarError(std::string("Unable to open tar file: ") + strerror(errno));
00055         }
00056 }
00057 
00058 TarFile::~TarFile()
00059 {
00060         try {
00061                 Close();
00062         } catch( TarError &te ) {}
00063 }
00064 
00065 bool TarFile::False(const char *msg)
00066 {
00067         m_last_error = msg;
00068         if( m_throw )
00069                 throw TarError(msg);
00070         else
00071                 return false;
00072 }
00073 
00074 bool TarFile::False(const std::string &msg, int err)
00075 {
00076         std::string str = msg;
00077         str += ": ";
00078         str += strerror(err);
00079         return False(str);
00080 }
00081 
00082 bool TarFile::Close()
00083 {
00084         if( m_tar ) {
00085                 if( m_writemode ) {
00086                         if( tar_append_eof(m_tar) != 0 )
00087                                 return False("Unable to write eof", errno);
00088                 }
00089 
00090                 if( tar_close(m_tar) != 0 ) {
00091                         return False("Unable to close file", errno);
00092                 }
00093                 m_tar = 0;
00094         }
00095         return true;
00096 }
00097 
00098 /// Appends a new file to the current tarfile, using tarpath as
00099 /// its internal filename, and data as the complete file contents.
00100 /// Uses current date and time as file mtime.
00101 bool TarFile::AppendFile(const char *tarpath, const std::string &data)
00102 {
00103         // write standard file header
00104         th_set_type(m_tar, REGTYPE);
00105         th_set_mode(m_tar, 0644);
00106         th_set_path(m_tar, const_cast<char*>(tarpath));
00107         th_set_user(m_tar, 0);
00108         th_set_group(m_tar, 0);
00109         th_set_size(m_tar, data.size());
00110         th_set_mtime(m_tar, time(NULL));
00111         if( th_write(m_tar) != 0 ) {
00112                 return False("Unable to write tar header", errno);
00113         }
00114 
00115         // write the data in blocks until finished
00116         char block[T_BLOCKSIZE];
00117         for( size_t pos = 0; pos < data.size(); pos += T_BLOCKSIZE ) {
00118                 memset(block, 0, T_BLOCKSIZE);
00119 
00120                 size_t size = T_BLOCKSIZE;
00121                 if( data.size() - pos < T_BLOCKSIZE )
00122                         size = data.size() - pos;
00123 
00124                 memcpy(block, data.data() + pos, size);
00125 
00126                 if( tar_block_write(m_tar, block) != T_BLOCKSIZE ) {
00127                         return False("Unable to write block", errno);
00128                 }
00129         }
00130 
00131         return true;
00132 }
00133 
00134 /// Reads next available file into data, filling tarpath with
00135 /// internal filename from tarball.
00136 bool TarFile::ReadNextFile(std::string &tarpath, std::string &data)
00137 {
00138         // start fresh
00139         tarpath.clear();
00140         data.clear();
00141 
00142         // read next tar file header... skip all directories
00143         do {
00144                 if( th_read(m_tar) != 0 ) {
00145                         // this is not necessarily an error, as it could just
00146                         // be the end of file, so a simple false is good here,
00147                         // don't throw an exception
00148                         m_last_error = "";
00149                         return false;
00150                 }
00151         } while( TH_ISDIR(m_tar) );
00152 
00153         // write standard file header
00154         if( !TH_ISREG(m_tar) ) {
00155                 return False("Only regular files are supported inside a tarball.");
00156         }
00157 
00158         char *pathname = th_get_pathname(m_tar);
00159         tarpath = pathname;
00160         //
00161         // FIXME (leak) - someday, when all distros use a patched version of
00162         // libtar, we may be able to avoid this memory leak, but
00163         // th_get_pathname() does not consistently return a user-freeable
00164         // string on all distros.
00165         //
00166         // See the following links for more information:
00167         //   https://bugs.launchpad.net/ubuntu/+source/libtar/+bug/41804
00168         //   https://lists.feep.net:8080/pipermail/libtar/2006-April/000222.html
00169         //
00170 //      free(pathname);
00171         size_t size = th_get_size(m_tar);
00172 
00173         // read the data in blocks until finished
00174         char block[T_BLOCKSIZE];
00175         for( size_t pos = 0; pos < size; pos += T_BLOCKSIZE ) {
00176                 memset(block, 0, T_BLOCKSIZE);
00177 
00178                 size_t readsize = T_BLOCKSIZE;
00179                 if( size - pos < T_BLOCKSIZE )
00180                         readsize = size - pos;
00181 
00182                 if( tar_block_read(m_tar, block) != T_BLOCKSIZE ) {
00183                         return False("Unable to read block", errno);
00184                 }
00185 
00186                 data.append(block, readsize);
00187         }
00188 
00189         return true;
00190 }
00191 
00192 // FIXME - yes, this is blatant copying of code, but this is
00193 // specific to Barry, to use a Barry::Data object instead of std::string
00194 // in order to reduce copies.
00195 bool TarFile::ReadNextFile(std::string &tarpath, Barry::Data &data)
00196 {
00197         // start fresh
00198         tarpath.clear();
00199         data.QuickZap();
00200 
00201         // read next tar file header... skip all directories
00202         do {
00203                 if( th_read(m_tar) != 0 ) {
00204                         // this is not necessarily an error, as it could just
00205                         // be the end of file, so a simple false is good here,
00206                         // don't throw an exception
00207                         m_last_error = "";
00208                         return false;
00209                 }
00210         } while( TH_ISDIR(m_tar) );
00211 
00212         // write standard file header
00213         if( !TH_ISREG(m_tar) ) {
00214                 return False("Only regular files are supported inside a tarball.");
00215         }
00216 
00217         char *pathname = th_get_pathname(m_tar);
00218         tarpath = pathname;
00219         //
00220         // FIXME (leak) - someday, when all distros use a patched version of
00221         // libtar, we may be able to avoid this memory leak, but
00222         // th_get_pathname() does not consistently return a user-freeable
00223         // string on all distros.
00224         //
00225         // See the following links for more information:
00226         //   https://bugs.launchpad.net/ubuntu/+source/libtar/+bug/41804
00227         //   https://lists.feep.net:8080/pipermail/libtar/2006-April/000222.html
00228         //
00229 //      free(pathname);
00230         size_t size = th_get_size(m_tar);
00231 
00232         // read the data in blocks until finished
00233         char block[T_BLOCKSIZE];
00234         for( size_t pos = 0; pos < size; pos += T_BLOCKSIZE ) {
00235                 memset(block, 0, T_BLOCKSIZE);
00236 
00237                 size_t readsize = T_BLOCKSIZE;
00238                 if( size - pos < T_BLOCKSIZE )
00239                         readsize = size - pos;
00240 
00241                 if( tar_block_read(m_tar, block) != T_BLOCKSIZE ) {
00242                         return False("Unable to read block", errno);
00243                 }
00244 
00245                 data.Append(block, readsize);
00246         }
00247 
00248         return true;
00249 }
00250 
00251 /// Read next available filename, skipping the data if it is
00252 /// a regular file
00253 bool TarFile::ReadNextFilenameOnly(std::string &tarpath)
00254 {
00255         // start fresh
00256         tarpath.clear();
00257 
00258         // read next tar file header... skip all directories
00259         do {
00260                 if( th_read(m_tar) != 0 ) {
00261                         // this is not necessarily an error, as it could just
00262                         // be the end of file, so a simple false is good here,
00263                         // don't throw an exception
00264                         m_last_error = "";
00265                         return false;
00266                 }
00267         } while( TH_ISDIR(m_tar) );
00268 
00269         // write standard file header
00270         if( !TH_ISREG(m_tar) ) {
00271                 return False("Only regular files are supported inside a tarball.");
00272         }
00273 
00274         char *pathname = th_get_pathname(m_tar);
00275         tarpath = pathname;
00276         // See above FIXME (leak) comment
00277 //      free(pathname);
00278 
00279         if( tar_skip_regfile(m_tar) != 0 ) {
00280                 return False("Unable to skip tar file", errno);
00281         }
00282 
00283         return true;
00284 }
00285 
00286 
00287 } // namespace reuse
00288 
00289 
00290 #ifdef __TEST_MODE__
00291 
00292 #include <iostream>
00293 #include <unistd.h>
00294 
00295 using namespace std;
00296 
00297 int main()
00298 {
00299         try {
00300                 cout << "Writing test file..." << endl;
00301                 reuse::TarFile output("tartest.tar.gz", true, true, true);
00302                 std::string data;
00303                 for( int i = 0; i < 60; i++ ) {
00304                         data.append("0123456789", 10);
00305                 }
00306 
00307                 output.AppendFile("path1/test1.txt", data);
00308                 output.AppendFile("path2/test2.txt", data);
00309                 output.Close();
00310 
00311 
00312                 cout << "Reading test file..." << endl;
00313                 reuse::TarFile input("tartest.tar.gz", false, true, true);
00314                 std::string path, incoming;
00315 
00316                 while( input.ReadNextFile(path, incoming) ) {
00317                         cout << "Read: " << path
00318                              << " Data: "
00319                              << (( data == incoming ) ? "equal" : "different")
00320                              << endl;
00321                 }
00322 
00323                 input.Close();
00324 
00325                 unlink("tartest.tar.gz");
00326 
00327         } catch( reuse::TarFile::TarError &te ) {
00328                 cerr << te.what() << endl;
00329                 return 1;
00330         }
00331 }
00332 
00333 #endif
00334