file_and_words_groups = group lowercased_file_and_word by (file_name, word);
  
 file_and_word_and_count = foreach file_and_words_groups generate flatten(group) as (file_name:chararray, word:chararray), COUNT_STAR(lowercased_file_and_word) as count;
  
 
  
 /* Now that we have the data massaged in the form of (filename, word, count). I will let you proceed with the rest of the exercise*/
  
  
 /* We can now group appropriately and get different statistics. For example, we are getting the number of terms per document using the Pig commands below. JOINS maybe important things to take note of when trying to normalise the tf or the idf scores*/
  
 group_file_and_word_and_count = group file_and_word_and_count by file_name;
  
  
 /* Add doc sizes to the term count tuple */
 unnormalized_term_counts = foreach group_file_and_word_and_count generate group as file_name, flatten(file_and_word_and_count.(word, count)) as (word, count), SUM(file_and_word_and_count.count) as doc_size;
  
  
 /* Generate the tf scores */
 term_frequencies = foreach unnormalized_term_counts generate file_name as file_name, word as term, ((double)count / (double)doc_size) as term_freq;
 
 
