You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

31 lines
515 B

#!/usr/bin/env python3
import json
START = "__START"
END = "__END"
with open('corpus.txt') as f:
corpus = f.read().split()
markov = {}
def markov_add(cur, nxt):
if cur not in markov:
markov[cur] = []
markov[cur].append(nxt)
cur = START
for word in corpus:
if len(word) == 0:
continue
markov_add(cur, word)
if word[-1] in '.?!':
markov_add(word, END)
cur = START
else:
cur = word
with open('markov.json', 'w') as f:
json.dump(markov, f)