Code:
/**********************************************************************
* Name : 24challange.cpp
* Author : Highspider
* Version : 9 - sentences length
* Description: Write a program that analyzes input from a file, essay.txt, and
* compiles statistics on it. The program should output:
* 1. The total word count
* 2. The count of unique words
* 3. The number of sentences
*
* Changes : Added Display mode -l
* Removed -w mode (strange debug because had trouble with
* step into-step out- debuging)
* Got rid of all non-const globals vars
* Made funcs cleaner -- had to without globals
* Fixed all memory leaks
* Added two more dynamic arrays for sim words and there count
* Added A bubble sort for similar words order
* Added Display Mode -s.
* Added Display Mode -p
* Use command line arg's for a cookie points.
* Added Display Mode -d and -n
*****************************************************************************/
//Declarations
#include <cstring> //for strcmp strlen strcpy strcmp
#include <iostream> //for Std in out
#include <stdlib.h> //for atoi
#include <fstream> //for files
using std::cout; //iostream: std out
using std::endl; //iostream: end line + return
using std::ifstream; //fstream: open read only mode
//const globals
const int MAX = 256; //MAX buffer size
//funcs Declarations
int GetWords(char buffer[MAX], char **&words, ifstream &wordfile, int &num_words,
char **&sent, int&num_sent);
int SimWords(char ** words, int num_words, char **&simwords, int &num_simwords,
char **&simcount, int &num_simcount);
void Phrases(char buffer[MAX], char ** words, char **&phrases, int &num_phrases,
int num_words);
void avgslen(char ** sent, int num_sent, char ** words, int num_words);
void DisplayWords(char ** words, int num_words, bool mode);
void DisplaySimWords(char ** simwords,char ** simwordscount, int num_simwords);
void sort(char **&simwordscount, char **&simwords, int num_simwordscount );
void DynamicAllocate(char buffer[MAX], char **&temp, char **&words, int & num_words);
void Deallocate(char **&words, int num_words);
//main with commandline args
int main (int argc, char* argv[])
{
//vars
char buffer[MAX]; //buffer place holder used for getline() and strcmp()
int sentence = 0; //count of sentences
int similar = 0; //count of found similar words
//Pointer of pointers and there count for dynamic allocating
int num_words = 0; //count of words listed in the array words
char ** words = 0; //pointer of pointers for the word list array
int num_sent = 0; //count number of sentences
char ** sent = 0; //pointer of pointers for sentences counts
int num_simwords = 0; //count of simwords listed in the array
char ** simwords = 0; //pointer of pointer for similar words
int num_simcount = 0; //count of counters put on similar words
char ** simcount = 0; //pointer of pointer for the count of similar words
int num_phrases = 0; //count of words listed in the array phrasewords
char ** phrases = 0; //pointer of pointers for the phrase list array
//check for command line args
if ( argc > 1 && argc < 4 )
{
//open the word list
ifstream wordfile;
//wordfile is [1] from command line
wordfile.open ( argv[1] );
//if file is found and opened
if (wordfile.is_open() ){
//call func getwords and sentence count
sentence = GetWords(buffer, words, wordfile, num_words, sent, num_sent);
//close the word file
wordfile.close();
//call func get similar words and the count of them
similar = SimWords(words, num_words, simwords, num_simwords,
simcount, num_simcount);
//set bool Display modes for DisplayWords from argv[2]
if ( argc == 3){
if(!strcmp( *(argv + 2 ), "-d" ))
DisplayWords(words, num_words, true);
else if(!strcmp( *(argv + 2 ), "-n" ))
DisplayWords(words, num_words, false);
else if(!strcmp( *(argv + 2 ), "-s" )){
//sort list before diplaying it
sort(simcount, simwords, num_simcount);
//display list and have x for analyze
DisplaySimWords(simwords, simcount, num_simwords);
}
else if(!strcmp( *(argv + 2 ), "-p" )){
//phrases()
Phrases(buffer, words, phrases, num_phrases, num_words);
//Display the phrases
DisplayWords(phrases, num_phrases, true);
//Deallocate memory for phrases
Deallocate(phrases, num_phrases);
delete [] phrases;
}
//-l avgerage sentences length and their words
else if(!strcmp( *(argv + 2 ), "-l" ))
avgslen(sent, num_sent, words, num_words);
}//end if command line arguments
//Display the words
//display header for analyze first
cout << "analyze\n"
<< "Total word count: "<< num_words << endl
<< "Unique words: " << num_words - similar << endl
<< "Sentences: " << sentence << endl;
//Deallocate memory for words array
Deallocate(words, num_words);
delete [] words;
//Deallocate memory for similar words array
Deallocate(simwords, num_simwords);
delete [] simwords;
//Deallocate memory for similar words count numbers array
Deallocate(simcount, num_simcount);
delete [] simcount;
//Deallocate memory for sentences length count
Deallocate(sent, num_sent);
delete [] sent;
}
//if their is an error opening the file
else{
cout << "\n!Error!\n!Error!\t" << argv[1]
<< "\n!Error!\tFile not found or unable to open\n!"
<< "Error!\n" << endl;;
}
}
else
//Display Usage if no command line args or to many args
cout << "\n\nUsage: " << argv[0] << " <filename> [ arg ]\n"
<< "\t\t -d: (d)isplay words of the file\n"
<< "\t\t -n: display words (n)ot formated\n"
<< "\t\t -s: display (s)imilar words\n"
<< "\t\t -p: display similar (p)hrases\n"
<< "\t\t -l: display average (l)ength of sentences\n"
<< endl;
}//end main
/***********************************************************************
*NAME: GetWords()
*PURPOSE: Get words out of a text file and build array for words
*PARAM: char **&words, ifstream the text file, num_words
* all by ref, bool watch, buffer by value
*Return: int total number of sentences found
***********************************************************************/
int GetWords(char buffer[MAX], char **&words, ifstream &wordfile,
int &num_words, char **&sent, int&num_sent)
{
char ** temp = 0; //temp for 3-way swap dynamic allocation
int buflen = 0; //temp for buffer len
int sentences = 0; //return for sentences
char tempbuff[MAX];
do{
//read from text file and use ' 'a blank space for delimator
wordfile.getline(buffer,256,' ');
//how many chars are in the buffer[]
buflen = strlen(buffer);
//loop for each char in buffer[]
for (int i = 0; i < buflen; ++i){
//change to upper case to lower case
if (buffer[i] >= 65 && buffer[i] < 90)
buffer[i] = buffer[i] + 32;
//get rid of commas,
else if ( buffer[i] == 44 )
buffer[i] = '\0';
//if start "quote
else if (buffer[i] == 34 && buflen > i + 1){
//push bach char the start quote ["quote/0] to be [quote/0]
for (int s = 0; s < buflen; ++s)
buffer[s] = buffer[s + 1];
//bufer len change
buflen = strlen(buffer);
//set back the main loop to reloop;
i = -1;
}
//count sentence and add terminator char in place
//46 = . 33 = ! 63 = ?
else if ( buffer[i] == 46 || buffer[i] == 33 || buffer[i] == 63 ){
sentences++;
//copy the count of words to a temp buffer
sprintf(tempbuff, "%d", num_words);
//build array of these counts to use for sentence lengths
DynamicAllocate(tempbuff, temp, sent, num_sent);
//num_words
buffer[i] = '\0';
}
//NOT 13 a carriage return its 10 newline
else if (buffer[i] == 10){
buffer[i] = '\0';
//build the extra array
DynamicAllocate(buffer,temp, words, num_words);
//there may be one two or more newlines
do {
i++;
} while ( buffer[i] == 10 );
//copy the new word over
for (int s = 0; s < buflen; ++s)
{
buffer[s] = buffer[i + s];
buflen = strlen(buffer);
}
//Reloop the main loop to check the new word
i = -1;
}
}//end for loop
//build arrays
DynamicAllocate(buffer,temp, words, num_words);
}while ( !wordfile.eof()); //end of do while loop end of file
return sentences;
}
/***********************************************************************
*NAME: SimWords()
*PURPOSE: Check for similar words
*PARAM: char ** words, int num_words by value. char simwords,
* : char num_simwords, char simcount, int num_simcount ref
*RETURN: int total number of simwords found
***********************************************************************/
int SimWords(char ** words, int num_words, char **&simwords, int &num_simwords,
char **&simcount, int &num_simcount){
char ** temp = 0; //temp for dynamic allocate
int number = 0; //temp for converting string to int back to string
int count = 0; //total count of similar words
bool addnew = true; //bool true/false flag for adding new entries
char GE[] = "2 "; /* OVERSIZED array default number used simcount
* if not oversized program leaks memory when changing
* values of char** simcount
*/
//loop m main for each word in the list
for (int m = 0; m < num_words; m++ ){
//loop s sub to compare with words left in the list
for (int s = m + 1; s < num_words; s++){
//if they are the same word
if (!strcmp(words[m], words[s] )){
//add a new entry (might change later)
addnew = true;
//add one to similar words found
count++;
//check if allready in the list
for (int ct = 0; ct < num_simwords; ct++){
//if same word already in simwords list
if(!strcmp( words[m], simwords[ct])) {
//update that entries count
//cast array to int and add one to the count
number = atoi( simcount[ct] ) + 1;
/*
* sprintf() is used as itoa
* Strange Linux version of itoa
* itoa is windows proprietary
* It took a long to figure this out
*///update the entry with new count
sprintf(simcount[ct], "%d", number);
//set the bool flag that entry has been edited
addnew = false;
//break this CT Count Twice loop
ct = num_words;
}
}
//if no odd entry was updated bool flag is true
//and we are adding new entry
if (addnew){
DynamicAllocate(words[m], temp, simwords, num_simwords);
DynamicAllocate(GE, temp, simcount, num_simcount);
}
//break the compare words left list loop
//to prevent over reads
s = num_words;
}//end if same word
}//end for() s
}//end for() m
return count;
}//end func
/************************************************************************
*NAME phrases()
*PURPOSE: Find similar phrases in a list of words
*PARAM: char buffer, char words, int number by value, char phrase
* : int num_phrase by ref
*RETURN: NONE
*************************************************************************/
void Phrases(char buffer[MAX], char **words, char **&phrases,
int &num_phrases, int num_words){
char buffer2[MAX]; //temp buffer for compareing words
char ** temp; //temp need for allocate 3-way swap
bool addnewflag = false; //add a new entry true or false
cout << "phrases" << endl;
for ( int i = 0; i < num_words - 2; ++i ){
addnewflag = true; // t/f for add new entry to found array
//build a buffer of 3 words
strcpy(buffer, words[i]);
strcat(buffer," "); //add spaces for display reasons
strcat(buffer, words[i + 1]);
strcat(buffer," ");
strcat(buffer, words[i + 2]);
//loop a buffer of the next 3 words
for ( int l = i + 2; l < num_words - 2; ++l ){
//build buffer3 of the next 3 words
strcpy(buffer2, words[l]);
strcat(buffer2," ");
strcat(buffer2, words[l + 1]);
strcat(buffer2," ");
strcat(buffer2, words[l + 2]);
//if match of the 3 words
if(!strcmp( buffer, buffer2 )){
//loop the array of phrases already found
for (int f = 0; f < num_phrases; f++){
//if phrase is already in - don't add it again
if(!strcmp( buffer, *(phrases + f) )){
addnewflag = false;
}
}
//if not updated entry flag is true add it
if (addnewflag){
DynamicAllocate(buffer, temp, phrases, num_phrases);
}
}
}//end l loop
}//end i loop
}//end func
/***********************************************************************
*NAME: avgslen(); average sentence length
*PURPOSE: find average length of sentences
*PARAM: char ** sent,words, int num_sent,num_words by value
*RETURN: none
***********************************************************************/
void avgslen(char ** sent, int num_sent, char ** words, int num_words){
float average = 0;
//display length of first sentence out side of the loop
//if there is at least one
if ( num_sent > 0 ){
//the first count [x]
cout << "\n\n1) [" << atoi(sent[0]) +1 << ']';
//the sentence words inside that count
for (int l = 0; l <= atoi(sent[0]); l++)
cout << ' ' << words[l];
//cleaner display
cout << endl;
//add to the average
average = atoi(sent[0]) + 1;
//loop the remaining
for (int i = 1; i < num_sent; i++){
//the next counter [x]
cout << i + 1 << ") ["
<< atoi(sent[i]) - atoi(sent[i - 1]) << ']';
//the words inside that count
for (int l = (atoi(sent[i - 1]) + 1); l <= atoi(sent[i]); l++)
cout << ' ' << words[l];
//cleaner display
cout << endl;
//calc the total for average length
average = average + (atoi(sent[i]) - atoi(sent[i - 1]));
}
//divid to find average
average = average / num_sent;
//display the results
cout << "\nThe average sentence length is: ["
<< average << "] words long\n\n";
}
}
/***********************************************************************
*NAME: DisplayWords()
*PURPOSE: cout << display words to screen
*PARAM: char ** words, int num_words bool mode by value
*RETURN: NONE
***********************************************************************/
void DisplayWords(char ** words, int num_words, bool mode){
// bool not formated display
if (!mode){
for ( int i = 0; i < num_words; ++i )
cout << *(words + i);
}
// else formated display
else{
for ( int i = 0; i < num_words; ++i )
cout << '\t' << i + 1 << '[' << *(words + i) << ']' << endl;
}
cout << endl;
}
/***********************************************************************
*NAME: DisplaySimWords()
*PURPOSE: display similar words and corresponding count to screen
*PARAM: char ** simwords, char ** simwordscount, num_words by value
*RETURN: NONE
***********************************************************************/
void DisplaySimWords(char ** simwords, char ** simwordscount, int num_simwords){
//Display header
cout << "\n\n\t\tDescending frequency most used words:\n\n";
//loop and display both word and its count
for ( int i = 0; i < num_simwords; ++i){
cout << i + 1 << " [" << *(simwords + i) <<"] * "
<< *(simwordscount + i) << endl;
}
}
/************************************************************************
*NAME sort(char **&simwordscount, char **&simwords, int num_simwordscount )
*PURPOSE: Resort the simwords arrays by frequency
*PARAM: char ** simwordscount, char ** simwords by ref,
* : int num_simwords by value
*NOTE: Bubble sort algorithm.
*RETURN: NONE
*************************************************************************/
void sort(char **&simwordscount, char **&simwords, int num_simwordscount )
{
char * temp; //place holder for 3-way swap
//nested loops NOT while(FLAG)
for ( int i = 0; i < num_simwordscount; i++)
{
for ( int i = 0; i < num_simwordscount - 1; i++)
//if less than then swap
//this way back-words compared to how others do it with while flags
if ( atoi(simwordscount[i]) < atoi(simwordscount[i + 1])) {
//3-way swap
//sort for simwordscount count up one cell in the array
temp = simwordscount[i];
simwordscount[i] = simwordscount[i + 1];
simwordscount[i + 1] = temp;
//3-way swap
//sort for simwordscount the corresponding i in simwords
//to match cells of the array
temp = simwords[i];
simwords[i] = simwords[i + 1];
simwords[i + 1] = temp;
}
}
}
/***********************************************************************
*NAME: DynamicAllocate()
*PURPOSE: Build arrays dynamically
*PARAM: char buffer by value, char temp, char words,num_words by ref
*RETURN: NONE
***********************************************************************/
void DynamicAllocate(char buffer[MAX], char **&temp, char **&words,
int &num_words){
//pointer of pointers Notice NEW char * []
temp = new char * [num_words + 1]; //temp for word array
//copy all previously entered words to our temp array
//note first time loop doesn't runs i=0 < num_words=0
//"(0<0)not true"
for ( int i = 0; i < num_words; i++){
temp[i] = words[i];
}
//make new temp array the size of the string
temp[num_words] = new char[strlen( buffer ) + 1];
//copy buffer to temp
strcpy( temp[num_words], buffer );
//deallocate old words
delete [] words;
//copy address of temp to words
words = temp;
//add one to number entries
num_words++;
}
/***********************************************************************
*NAME: Deallocate()
*PURPOSE: Delete / de-allocate memory used by the program
*PARAM: char words by ref, int num_words by value
*RETURN: NONE
***********************************************************************/
void Deallocate(char **&words, int num_words){
//delete | de-allocate each array of words two star char **
for ( int i = 0; i < num_words; ++i)
delete [] words[i];
}
Bookmarks