#include <string>
#include <iostream>
#include <fstream>
#include <vector>
#include <cctype>
#include <limits>
#include <ctype.h>
#define MAXLINE 100000 
/*----------------------------------------------------------------------
Usage: indexCorpus inFile outFile vocabFile flag

Go over inFile, compile a vocabulary of distinct words and save
it to vocabFile. Use the vocabulary to create outFile in which 
the words are replaced by their integer indices into the vocabulary.
If the optional flag parameter is "nocase" then casing information is
ignored.

Guy Lebanon, 5/17/2005.
------------------------------------------------------------------------*/
using namespace std;
void lowerCase(char* s)
{
  for (int i=0;i<strlen(s);i++)
    s[i]=tolower(s[i]);
}
int main(int argc,char **argv)
{
  ifstream fr(argv[1]);
  ofstream fw(argv[2]), fv(argv[3]);
  char buf[MAXLINE],c;
  int ind,i;
  bool flag=false;
  string currW;
  vector<string> V;
  if (argc==5 && !strcmp(argv[4],"nocase")) flag=true;
  if (argc<4) {cerr<<"Not enough arguments"<<endl;return 0;}
  if (!fr || !fw || !fv) {cerr<<"Error opening Files"<<endl;return 0;}
  while (fr.get(c)) {
    if (c==' ')  continue;
    if (c=='\n') {fw<<endl;continue;}
    else { 
      fr.putback(c);		
      fr>>buf;
      if (isdigit(buf[0])) continue;
      if (flag==true) lowerCase(buf);
      currW=buf;
      ind=-1;
      for (i=0;i<V.size();i++)
	if (currW==V[i]) {ind=i;break;}
      if (ind==-1) {V.push_back(currW);ind=V.size()-1;}
      fw<<" "<<ind;
    }
  }
  for (i=0;i<V.size();i++) fv<< V[i]<<endl;
  return 0;
}
