[Python] Python Step by Step[Python] Python Step by Step
✂️
LLM Tokeniser
Week 13, 2026
def main() -> None:
prefixes = ["inter", "under", "multi", "hyper", "trans", "over", "auto", "semi", "anti", "dis", "pre", "mis", "sub", "un", "re", "in", "ex", "co", "il", "im"]
suffixes = ["less", "ness", "able", "ible", "ment", "hood", "ship", "ing", "ful", "ion", "ist", "ism", "ity", "ize", "ous", "age", "ed", "ly", "er", "or", "ic", "al"]
specific = ['`', '~', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')', '-', '_', '+', '=', '[', ']', '{', '}', '\\', '|', '/', '"', '\'', ':', ';', '>', '<', ',', '.', '?']
text = "Example Text 123"
result = []
for el in text.split(' '):
out: list[str] = []
d = el.lower()
j = 0
for i in range(len(d)):
if d[i] in specific:
out.append(el[j:i])
out.append(d[i])
j = i+1
if j != len(d):
out.append(el[j:])
for i in range(len(out)):
l = len(out[i])
index = -1
for j in range(l):
if out[i][j].isdecimal() and index == -1:
index = j
elif index != -1 and not out[i][j].isdecimal():
out.insert(i+1, out[i][index:j])
out.insert(i+2, out[i][j:])
out[i] = out[i][:index]
break
elif index != -1 and j == l-1:
out.insert(i+1, out[i][index:])
out[i] = out[i][:index]
break
shift = 0
for i in range(len(out)):
skip = False
if len(out[i+shift]) > 1 and not out[i+shift].isdecimal():
for pref in prefixes:
a = out[i+shift][:len(pref)]
if out[i+shift].lower() == pref:
skip = True
break
if out[i+shift][:len(pref)].lower() == pref:
out.insert(i+shift+1, out[i+shift][len(pref):])
out[i+shift] = a
shift += 1
break
if skip: break
for suff in suffixes:
a = out[i+shift][-len(suff):]
if out[i+shift].lower() == suff:
break
if out[i+shift][-len(suff):].lower() == suff:
out.insert(i+shift+1, a)
out[i+shift] = out[i+shift][:-len(suff)]
shift += 1
break
for i in out:
if i != '':
result.append(i)
for i in result:
print(i)
main()