EveryGrams

import dependencies:

import nltk
from collections import Counter
from nltk.lm.preprocessing import flatten
from nltk.util import everygrams
import pandas as pd

prep dataset:

df = pd.DataFrame(
    [
    ["i go to school every day",22,"male"],
    ["he goes to school every day",35,"male"],
    ["they go to school every day",58,"female"],
    ],
    columns=["text","Age","Sex"]
)
df

dataset output:

create dataframe:

df['gramx']=[list(everygrams(s.split(), min_len=1, max_len=3)) for s in df['text']]

df

dataframe output:

create count_gramx:

count_gramx= Counter(list(flatten([list_item for list_item in df['gramx']])))
count_gramx

count_gram output:

count_gram output: