#Convert the grant proposals into a sequence file. It combines all the files into a single file
 bin/mahout seqdirectory -i /user/hadoop/grants -o /user/hadoop/grants-seqdir -c UTF-8 -chunk 5
  
 #Use seqdumper to visualize the sequence files. Observe the combining of all the files
 bin/mahout seqdumper -i /user/hadoop/grants-seqdir/part-m-00000
  
  
 #Generate all statistics like tf, df and tf-idf from the corpus. Generate ids for each term and construct the dictionary. All the vectors are in a sparse format
 bin/mahout seq2sparse -i /user/hadoop/grants-seqdir/ -o /user/hadoop/grants-seqdir-sparse --maxDFPercent 85 --namedVector  
  
 #Examine the tf-idf vectors
 bin/mahout seqdumper -i /user/hadoop/grants-seqdir-sparse/tfidf-vectors/part-r-00000
 # Examine the dictionary file termid to term mappings
  
 bin/mahout seqdumper -i /user/hadoop/grants-seqdir-sparse/dictionary.file-0
  
 # Run kmeans for 3 clusters with cosine of tf-idf as the distance metric
 bin/mahout kmeans -i /user/hadoop/grants-seqdir-sparse/tfidf-vectors/ -c /user/hadoop/grants-kmeans-clusters -o /user/hadoop/grants-kmeans -dm org.apache.mahout.common.distance.CosineDistanceMeasure -x 10 -k 3 -ow --clustering 
  
 # Use cluster dump to get metrics about the cluster
 bin/mahout clusterdump -i /user/hadoop/grants-kmeans/clusters-*-final -o clusterdump -d  /user/hadoop/grants-seqdir-sparse/dictionary.file-0 -dt sequencefile -b 100 -n 20 --evaluate -dm org.apache.mahout.common.distance.CosineDistanceMeasure -sp 0 --pointsDir /user/hadoop/grants-kmeans/clusteredPoints
 
 
