markov/markov.py

56 lines
1.7 KiB
Python
Executable File

#!/usr/bin/env python3
from sys import argv
from string import ascii_letters
from random import choice
def get_word_list(text):
# Define a set of allowed letters
allowed_letters = ascii_letters + "áàéèíìóòúù" + "ÁÀÉÈÍÌÓÒÚÙ" + "'."
# Keep only the allowed replacing the others with a space
text = "".join(c.lower()
if c in allowed_letters else " "
for c in text)
# Split by words, using space as separator
return text.split()
def get_associations(word_list):
# Build a dictionary where each word is a key, and the values are the list
# of the words that follow the key one
# ['a', 'b', 'a', 'c'] became {'a': ['b', 'c'], 'b': ['a']}
associations = {}
for i, word in enumerate(word_list[:-1]):
if word not in associations:
associations[word] = []
associations[word].append(word_list[i+1])
return associations
def generate(text, max_words=100):
word_list = get_word_list(text)
associations = get_associations(word_list)
out_list = []
current_word = choice(word_list)
for i in range(max_words):
out_list.append(current_word)
choice_list = associations.get(current_word, word_list)
if current_word.endswith("."):
break
current_word = choice(choice_list)
out_string = " ".join(out_list)
out_string = out_string.replace(" .", ".")
out_string = out_string[0].upper() + out_string[1:]
return out_string
if __name__ == '__main__':
if argv[1:]:
with open(argv[1]) as f:
print(generate(f.read()))
else:
print('Usage:', argv[0], 'something_antani.txt')