8
\$\begingroup\$

I tried implementing a radix tree using C language in order to get the auto complete suggestions of a dictionary. What are the improvements that I can do here? Also, is there any better way of doing this?

The text file I used has the following format

a at along bat cat 

Here's my implementation.

#include <stdio.h> #include <stdlib.h> #include <stdbool.h> #include <string.h> #include <time.h> #include <ctype.h> // define a maximum length for the string. #define MAX_LEN (200) // define the alphabet size for english. #define ALPHABET_SIZE (26) // linked String structure typedef struct LinkedString { // linked string node contains a char. char character; struct LinkedString *next; } LinkedString; // TrieNode structure typedef struct TrieNode { struct TrieNode *children[ALPHABET_SIZE]; // linked string to place the str inside the trie node. LinkedString *LinkedString; // boolean to check for leaf_nodes bool is_leaf; } TrieNode; /* function signatures */ // create a trie Node. TrieNode *createNode(); // appends to the string[initial: end] and return a linked string. LinkedString *appendLinkedString(char *str, int inital, int end); // break the linked string and create a node with the breaked string and returns intial. TrieNode *breakLinkedString(TrieNode *previousNode, TrieNode *node, LinkedString *breakPoint); // insert a str to the trie node. void insert(TrieNode *root, char *str); // returns the pointer if found the str else returns NULL TrieNode *searchNodes(TrieNode *root, char *str); // convert a string in to charactors int str_to_char(LinkedString *begin, char *str, int initial); // print suggestions for the prefix. void printSuggetions(TrieNode *suggestedNode, char str[], int Size); // create a linked string with a given char. LinkedString *createString(char Char); // node count to get the memory used. int node_count = 0; // link string count to get the memory. int link_string_node_count = 0; int main(){ char input[MAX_LEN]; char str[MAX_LEN]; char lowerCase[MAX_LEN]; // read the file TrieNode *root = createNode(); // file name of the wordlist char *fileName = "wordlist70000.txt"; // create a file pointer to read the file FILE *fptr = fopen(fileName, "r"); int j = 0; // elapsed time ro insert all entries ti the trie clock_t start_time = clock(); // read the file and insert words into the tire while (fgets(str, MAX_LEN - 1, fptr)) { j = 0; // check whether the character is in alphabet for (int i = 0; i < MAX_LEN; ++i){ if (((str[i] > 64) && (str[i] < 91)) || (str[i] > 96) && (str[i] < 123)) { // converts the characters into lowercase lowerCase[j] = tolower(str[i]); j++; } if (str[i] == '\n'|| str[i] == '\0') { break; } } // configure the terminating character lowerCase[j] = 0; // insert into the trie insert(root, lowerCase); } clock_t end = clock(); // end the timer double elapsedtime = (double)(end - start_time) / CLOCKS_PER_SEC; printf("\nElapsed time for insert: %f s\n", elapsedtime); while (true){ // prompt the user to inputs printf("Enter the prefix['0' for quit!] :\n"); // get the input scanf("%s", input); // copy the input to the str strcpy(str, input); //if the exit condition is met if (strcmp(str, "0") == 0){ printf("Quit! \n"); break; } j = 0; // check whether the character is in alphabet for (int i = 0; i < MAX_LEN; ++i){ if (((str[i] > 64) && (str[i] < 91)) || (str[i] > 96) && (str[i] < 123)) { // converts the characters into lowercase lowerCase[j] = tolower(str[i]); j++; } if (str[i] == '\n' || str[i] == '\0') { break; } } // configure the terminating character lowerCase[j] = 0; strcpy(str, lowerCase); // star the time to track the elapsed time clock_t start_time = clock(); // check for the prefix TrieNode *suggestedNode = searchNodes(root, str); if (!suggestedNode) { // if prefix not found printf("Prefix Not Found!\n\n"); continue; } printf("\n ***************** SUGGETIONS ****************** \n"); printSuggetions(suggestedNode, str, strlen(str)); // end the timer clock_t end = clock(); // calculate the elapse time double elapsedtime = (double)(end - start_time) / CLOCKS_PER_SEC; // print the elapsed time and memory used. printf("\nElapsed time : %f s\n", elapsedtime); int count1 = (int)sizeof(*(root)) * node_count; int count2 = (int)sizeof(LinkedString) * link_string_node_count; printf("Memory used : %d Bytes\n\n", count1 + count2); } return 0; } TrieNode *createNode() { node_count++; // allocate space for new trie node. TrieNode *newtrieNode = (TrieNode *)malloc(sizeof(TrieNode)); // initialize the linked string with NULL newtrieNode->LinkedString = NULL; // make it as not a leaf newtrieNode->is_leaf = false; int i; for (i = 0; i < ALPHABET_SIZE; ++i) { // make all the chiledrens null. newtrieNode->children[i] = NULL; } return newtrieNode; } LinkedString *createString(char Char) { link_string_node_count++; // allocate space for a new string dynamically. LinkedString *newString = (LinkedString *)malloc(sizeof(LinkedString)); // put the char into its character newString->character = Char; // make the next pointer null. newString->next = NULL; // return the new string. return newString; } LinkedString *appendLinkedString(char *str, int inital, int end) { int i; // create a pointers to linked strings. LinkedString *current = createString(str[inital]); LinkedString *newString = NULL; LinkedString *string = current; // go from initial position to end position for (i = inital + 1; i < end; ++i) { // crate a new string with char index i or str newString = createString(str[i]); // make the next pointer point to new string. current->next = newString; // go through the string current = current->next; } // make the last pointer at the end. current = NULL; return string; } TrieNode *breakLinkedString(TrieNode *previousNode, TrieNode *node, LinkedString *breakPoint) { // create a new trie node pointer. TrieNode *newNode = createNode(); // create a new string beginning next to the break point. LinkedString *newString = breakPoint->next; breakPoint->next = NULL; // convert char to index int index1 = (newString->character) &mdash; 'a'; newNode->LinkedString = node->LinkedString; node->LinkedString = newString; newNode->children[index1] = node; int index2 = (newNode->LinkedString->character) - 'a'; // pointer the new node to the relevent index of the parent node. previousNode->children[index2] = newNode; // return the newnode. return newNode; } void insert(TrieNode *root, char *str) { // get the length of the str int lastLetterIndex = strlen(str); int i = 0, charIndex; // create trie nodes and likedlist pointers to track // previous current and next pointer nodes. TrieNode *currentNode = root, *previousNode = NULL; TrieNode *newNode = NULL; LinkedString *currentLetter, *previousLetter = NULL; currentLetter = currentNode -> LinkedString; // go till the last leter of the string. while (i < lastLetterIndex) { charIndex = (str[i]) - 'a'; // if current letter is null if (currentLetter == NULL) { // if the trie is empty. if (currentNode->children[charIndex] == NULL) { // create a new node. newNode = createNode(); // append the string[i:lastLetterIndex] point to the new nodes linked newNode->LinkedString = appendLinkedString(str, i, lastLetterIndex); newNode->is_leaf = true; currentNode->children[charIndex] = newNode; break; } else { // if it is the first node. // make the previous node pointing to the current node. previousNode = currentNode; currentNode = currentNode->children[charIndex]; previousLetter = currentNode->LinkedString; currentLetter = currentNode->LinkedString->next; } } else { if (currentLetter->character != str[i]) { // make the current node pointing to the breakedLinkedString. currentNode = breakLinkedString(previousNode, currentNode, previousLetter); // create a new node. newNode = createNode(); // append the link string to the newNodes LinkedString. newNode->LinkedString = appendLinkedString(str, i, lastLetterIndex); // make new node a leaf node newNode->is_leaf = true; // put newNode as a child of a current node. currentNode->children[charIndex] = newNode; break; } else { previousLetter = currentLetter; currentLetter = currentLetter->next; } } i++; } } TrieNode *searchNodes(TrieNode *root, char *str) { // get the lastIndex of the str int lastIndex = strlen(str); int i = 0, charIndex; TrieNode *currentNode = root; LinkedString *currentLetter = currentNode->LinkedString; while (i < lastIndex) { // get the index from the char Index. charIndex = str[i] - 'a'; // if current letter is null. if (currentLetter == NULL) { // point the relevent index to the currentNode. currentNode = currentNode->children[charIndex]; // if the word is not found. Then the current node // and the currentLetter is NULL. if (!currentNode) return NULL; // make the currentLeter pointing to the linked strings next letter. currentLetter = currentNode->LinkedString->next; } else { // go to the next letter. currentLetter = currentLetter->next; } i++; } // go untilll the current letter is found null while (currentLetter != NULL) { // go till the end of the character. str[lastIndex] = currentLetter->character; currentLetter = currentLetter->next; lastIndex++; } // make the last index pointing to the terminating character str[lastIndex] = '\0'; return currentNode; } int str_to_char(LinkedString *begin, char *str, int initial) { int newSize = initial; LinkedString *currentLetter = begin; // go while current lettter is not null while (currentLetter != NULL) { // put the char by char in str str[newSize] = currentLetter->character; // goto the next letter currentLetter = currentLetter->next; newSize++; } // set the terminating character. str[newSize] = '\0'; return newSize - 1; } void printSuggetions(TrieNode *suggestedNode, char str[], int Size) { // create a poiter to the suggested node TrieNode *currentNode = suggestedNode; int i, j, newSize; if (currentNode->is_leaf){ // go till the word_size for (j = 0; j < Size; ++j){ // print the uppercased char printf("%c", toupper(str[j])); } // print the endline printf("\n"); } // go through each children node. for (i = 0; i < ALPHABET_SIZE; ++i) { // if each child is not null if (currentNode->children[i] != NULL) { // break down the string and get the characters newSize = str_to_char(currentNode->children[i]->LinkedString, str, Size); // print the suggestion. printSuggetions(currentNode->children[i], str, newSize + 1); } } } 
\$\endgroup\$

    1 Answer 1

    4
    \$\begingroup\$

    Parenthesized macros

    The parens around

    (200) 

    aren't strictly necessary. Most of the time that we see these, it's because you want to enforce order of operations at the calling site, but there are no operations here.

    Statics

    This is a one-file (one-translation-unit) program, so all of your methods except main should be marked static.

    Const

    It's important that fileName be marked const.

    Pre-declaration

    You're using C99 or later due to your placement of int j. That means that you can actually narrow its scope and move the declaration into the loop itself:

    // read the file and insert words into the tire while (fgets(str, MAX_LEN - 1, fptr)) { int j = 0; 

    Encoding and literals

    Numeric literals for characters is generally not a good idea (e.g. 64 for @). Just use the '@' character literal instead. Reasons include legibility, maintainability, and easier transition to other character sets than ASCII if you use alternate compiler flags.

    Also, rather than str[i] > '@', it would be much clearer to write str[i] >= 'A'.

    Else

     if (str[i] == '\n'|| str[i] == '\0') { 

    should be an else if since those conditions are entirely disjoint from the ones above. Or you can move the null-or-newline check to be the first condition, in which case the second one wouldn't benefit from an else due to the break.

    Factoring out functions

    The loop after

     // check whether the character is in alphabet 

    is copy-and-pasted. Move this into a function for reuse.

    Mdash instead of hyphen?

    There's an encoding quirk:

    int index1 = (newString->character) &mdash; 'a'; 

    You're probably going to want to replace &mdash with a plain hyphen.

    \$\endgroup\$
    1
    • \$\begingroup\$I think it would have been good if you had explained why fileName should be declared as const. Otherwise good review.\$\endgroup\$
      – pacmaninbw
      CommentedNov 1, 2020 at 0:48

    Start asking to get answers

    Find the answer to your question by asking.

    Ask question

    Explore related questions

    See similar questions with these tags.