You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
31 lines
515 B
31 lines
515 B
#!/usr/bin/env python3
|
|
|
|
import json
|
|
|
|
START = "__START"
|
|
END = "__END"
|
|
|
|
with open('corpus.txt') as f:
|
|
corpus = f.read().split()
|
|
|
|
markov = {}
|
|
|
|
def markov_add(cur, nxt):
|
|
if cur not in markov:
|
|
markov[cur] = []
|
|
markov[cur].append(nxt)
|
|
|
|
cur = START
|
|
for word in corpus:
|
|
if len(word) == 0:
|
|
continue
|
|
markov_add(cur, word)
|
|
if word[-1] in '.?!':
|
|
markov_add(word, END)
|
|
cur = START
|
|
else:
|
|
cur = word
|
|
|
|
with open('markov.json', 'w') as f:
|
|
json.dump(markov, f)
|