2. Spell Checker

from nltk.corpus import words
correct_spellings = words.words()

2.1. Jaccard Distance on Trigram

def answer_nine(entries=['cormulent', 'incendenece', 'validrate']):
    # get first letter of each word with c
    c = [i for i in correct_spellings if i[0]=='c']
    # calculate the distance of each word with entry and link both together
    one = [(nltk.jaccard_distance(set(nltk.ngrams(entries[0], n=3)), \
                                  set(nltk.ngrams(a, n=3))), a) for a in c]

    i1 = [i for i in correct_spellings if i[0]=='i']
    two = [(nltk.jaccard_distance(set(nltk.ngrams(entries[1], n=3)), \
                                  set(nltk.ngrams(a, n=3))), a) for a in i1]

    v = [i for i in correct_spellings if i[0]=='v']
    three = [(nltk.jaccard_distance(set(nltk.ngrams(entries[2], n=3)), \
                                  set(nltk.ngrams(a, n=3))), a) for a in v]

    # sort them to ascending order so shortest distance is on top.
    # extract the word only
    output = [sorted(one)[0][1], sorted(two)[0][1], sorted(three)[0][1]]

    return output

answer_nine()

2.2. Jaccard Distance on 4-gram

def answer_ten(entries=['cormulent', 'incendenece', 'validrate']):

    # get first letter of each word with c
    c = [i for i in correct_spellings if i[0]=='c']
    # calculate the distance of each word with entry and link both together
    one = [(nltk.jaccard_distance(set(nltk.ngrams(entries[0], n=4)), \
                                  set(nltk.ngrams(a, n=4))), a) for a in c]

    i1 = [i for i in correct_spellings if i[0]=='i']
    two = [(nltk.jaccard_distance(set(nltk.ngrams(entries[1], n=4)), \
                                  set(nltk.ngrams(a, n=4))), a) for a in i1]

    v = [i for i in correct_spellings if i[0]=='v']
    three = [(nltk.jaccard_distance(set(nltk.ngrams(entries[2], n=4)), \
                                  set(nltk.ngrams(a, n=4))), a) for a in v]

    # sort them to ascending order so shortest distance is on top.
    # extract the word only
    output = [sorted(one)[0][1], sorted(two)[0][1], sorted(three)[0][1]]

    return output

answer_ten()

2.3. Edit Distance

def answer_eleven(entries=['cormulent', 'incendenece', 'validrate']):

    from nltk.corpus import words

    correct_spellings = words.words()

    # get first letter of each word with c
    c = [i for i in correct_spellings if i[0]=='c']
    # calculate the distance of each word with entry and link both together
    one = [((nltk.edit_distance(entries[0], a)), a) for a in c]

    i1 = [i for i in correct_spellings if i[0]=='i']
    two = [((nltk.edit_distance(entries[1], a)), a) for a in i1]

    v = [i for i in correct_spellings if i[0]=='v']
    three = [((nltk.edit_distance(entries[2], a)), a) for a in v]

    # sort them to ascending order so shortest distance is on top.
    # extract the word only
    output = [sorted(one)[0][1], sorted(two)[0][1], sorted(three)[0][1]]

    return output

answer_ten()