EveryGrams
import dependencies:
import nltk
from collections import Counter
from nltk.lm.preprocessing import flatten
from nltk.util import everygrams
import pandas as pd
prep dataset:
df = pd.DataFrame(
[
["i go to school every day",22,"male"],
["he goes to school every day",35,"male"],
["they go to school every day",58,"female"],
],
columns=["text","Age","Sex"]
)
df
dataset output:
create dataframe:
df['gramx']=[list(everygrams(s.split(), min_len=1, max_len=3)) for s in df['text']]
df
dataframe output:
create count_gramx:
count_gramx= Counter(list(flatten([list_item for list_item in df['gramx']])))
count_gramx
count_gram output:
count_gram output: