1. This site uses cookies. By continuing to use this site, you are agreeing to our use of cookies. Learn More.

file manipulation

Discussion in 'C' started by mop65715, Apr 14, 2006.

  1. mop65715

    mop65715 New Member

    Joined:
    Apr 14, 2006
    Messages:
    2
    Likes Received:
    0
    Trophy Points:
    0
    I've got a binary file that contains header information followed by data.

    [ header ]
    [ data ] -> [ data 1 ] _first_half.bin
    [ ] -> [ data 2 ] _second_half.bin

    Per the pictorial above. The 'file' on the left is the actual file with a 4K header and - assume 1 Mebibyte of data.
    The pictorial on the right is my desired intent. Meaning I'm 'break' the data portion within the file on the left in two. This results in two files - called first_half.bin and second_half.bin. I'll also store the contents of first_half.bin and second_half.bin in a vector of characters - say vecFirstHalf and vecSecondHalf respectively.

    The source below shows my attempt at achieving my objective. For simulation purposes, I create a file and write a header 4K and 1MiB of data within said file. I then read in said file. Create vecFirstHalf and vecSecondHalf respectively then store the contents of vecFirstHalf and vecSecondHalf in outFileFirst and outFileSecond respectively.
    At issue. I'm unsure if my approach is very efficient. For large sizes of data - say 168 MiB performance is a drag. Ideas on how to improve this welcomed.

    Thanks in advance


    Code:
    #include <iostream>
    #include <iomanip>
    #include <iterator>
    #include <fstream>
    #include <vector>
    #include <string>
    #include <algorithm>
    
    using namespace std; 
    
    #define HEADER_SIZE 4096
    #define DATA_SIZE 0x100000
    #define CHUNK_SIZE 0x80000
    
    int main( )
    {
      vector<char> buf;
      vector<char> vecFirstHalf;
      vector<char> vecSecondHalf;
    
      clog << "Creating File..." << endl;
      ofstream outFile( "out.bin", ios::binary | ios::trunc );
      if( ! outFile.is_open( ) ) {
         cerr << "Cannot open out.bin for writing" << endl;
        return EXIT_FAILURE;
      }
    	
      // create header
      string s("test");
      buf.insert(buf.end(),s.begin(), s.end() );
      buf.resize(HEADER_SIZE, 'h');
    
      clog << "Header size: " << buf.size() << endl;
    	
      // write header to file
      copy( buf.begin(), buf.end(), ostream_iterator<char>(outFile) );
    	
      // create first data chunk
      buf.clear();
      buf.resize( CHUNK_SIZE, '1');
      copy( buf.begin(), buf.end(), ostream_iterator<char>(outFile) );
    
      clog << "First chunk size: " << buf.size() << endl;
    	
      // create second data chunk
      buf.clear();
      buf.resize( CHUNK_SIZE, '2');
      copy( buf.begin(), buf.end(), ostream_iterator<char>(outFile) );
    
      clog << "Second chunk size: " << buf.size() << endl;
    	
      // Tidy up	
      outFile.close();
      buf.clear();
    
      clog << "Processing Data..." << endl;
      // Now process the data
      ifstream inFile( "out.bin", ios::binary );
      if( ! inFile.is_open( ) ) {
        cerr << "Cannot open out.bin for reading" << endl;
        return EXIT_FAILURE;
      }
      inFile.unsetf( ios::skipws );
    
      // position myself past the header
      inFile.seekg( HEADER_SIZE, ios::beg ); 
     
       clog << "Performed seek to: " << inFile.tellg() << endl;
     	
      // read in file
      buf.assign(istream_iterator<char>(inFile), istream_iterator<char>() );
    
      clog << "Read bytes: " << buf.size() << endl;
    	
      // check for errors
      if( ! inFile.eof() ) {
        cerr << "Read error" << endl;
        return EXIT_FAILURE;
      }
      inFile.close();
      if ( buf.size() < DATA_SIZE ) {
        cerr << "File less than spec?" << endl;
        return EXIT_FAILURE;
      }
    
      // get middle iterator
      vector<char>::iterator bufiter = buf.begin();
      bufiter += CHUNK_SIZE;
    	
      // read first half of data
      vecFirstHalf.assign( buf.begin(), bufiter );
    
      clog << "Assigned to first half: " << vecFirstHalf.size() << endl;
    	
      // read second half
      vecSecondHalf.assign( bufiter, buf.end() );
    
      clog << "Assigned to second half: " << vecSecondHalf.size() << endl;
    
      clog << "Writing first half. " << endl;
    
      // Dump first half.
      ofstream outFirstHalf( "outFileFirst.bin", ios::binary | ios::trunc );
      if( ! outFirstHalf.is_open() ) {
         cerr << "Cannot open outFileFirst.bin for writing" << endl;
        return EXIT_FAILURE;
      }
      copy( vecFirstHalf.begin(), vecFirstHalf.end(), ostream_iterator<char>(outFirstHalf));
      outFirstHalf.close();
    
      clog << "Writing second half. " << endl;
    	
      // Dump second half
      ofstream outSecondHalf( "outFileSec.bin", ios::binary | ios::trunc );
      if( ! outSecondHalf.is_open() ) {
        cerr << "Cannot open outFileSec.bin for writing" << endl;
        return EXIT_FAILURE;
      }
      copy( vecSecondHalf.begin(), vecSecondHalf.end(), ostream_iterator<char>(outSecondHalf) );
      outSecondHalf.close();
    	
      // Pretty print the first half.
      // Also avoids control charachters being sent to terminal
      clog << "First half of data:" << endl;
      vector<char>::const_iterator iter = vecFirstHalf.begin();
      vector<char>::const_iterator vecend = vecFirstHalf.end();
      cout << hex;
      while( iter != vecend ) {
         for (int i = 0; i < 8; i++) {
               cout << "0x"				//start hex notation
    	 << setw(2) << setfill('0') << right	//format number
    	<< (0x00FF & (short signed int)*iter++) //mask multibyte short to byte size
    	<< " ";					//give some space
           if( iter == vecend ) break;		//make sure we have more data
        }
        cout << endl;
      }
      cout << dec << endl;
      return EXIT_SUCCESS;
    }
    
    
     
  2. shabbir

    shabbir Administrator Staff Member

    Joined:
    Jul 12, 2004
    Messages:
    15,283
    Likes Received:
    364
    Trophy Points:
    83
    For such a huge amount of data its always better to be implementing in threads rather than in main process.
     

Share This Page