Add Markov chain
This commit is contained in:
commit
9c8bf81f6f
55
markov.py
Executable file
55
markov.py
Executable file
|
@ -0,0 +1,55 @@
|
|||
#!/usr/bin/env python3
|
||||
from sys import argv
|
||||
from string import ascii_letters
|
||||
from random import choice
|
||||
|
||||
|
||||
def get_word_list(text):
|
||||
# Define a set of allowed letters
|
||||
allowed_letters = ascii_letters + "áàéèíìóòúù" + "ÁÀÉÈÍÌÓÒÚÙ" + "'."
|
||||
# Keep only the allowed replacing the others with a space
|
||||
text = "".join(c.lower()
|
||||
if c in allowed_letters else " "
|
||||
for c in text)
|
||||
# Split by words, using space as separator
|
||||
return text.split()
|
||||
|
||||
|
||||
def get_associations(word_list):
|
||||
# Build a dictionary where each word is a key, and the values are the list
|
||||
# of the words that follow the key one
|
||||
# ['a', 'b', 'a', 'c'] became {'a': ['b', 'c'], 'b': ['a']}
|
||||
associations = {}
|
||||
for i, word in enumerate(word_list[:-1]):
|
||||
if word not in associations:
|
||||
associations[word] = []
|
||||
associations[word].append(word_list[i+1])
|
||||
return associations
|
||||
|
||||
|
||||
def generate(text, max_words=100):
|
||||
word_list = get_word_list(text)
|
||||
associations = get_associations(word_list)
|
||||
out_list = []
|
||||
current_word = choice(word_list)
|
||||
|
||||
for i in range(max_words):
|
||||
out_list.append(current_word)
|
||||
choice_list = associations.get(current_word, word_list)
|
||||
if current_word.endswith("."):
|
||||
break
|
||||
current_word = choice(choice_list)
|
||||
|
||||
out_string = " ".join(out_list)
|
||||
out_string = out_string.replace(" .", ".")
|
||||
out_string = out_string[0].upper() + out_string[1:]
|
||||
|
||||
return out_string
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if argv[1:]:
|
||||
with open(argv[1]) as f:
|
||||
print(generate(f.read()))
|
||||
else:
|
||||
print('Usage:', argv[0], 'something_antani.txt')
|
Loading…
Reference in New Issue
Block a user