commit 9c8bf81f6fb492129e772d95fe2e2d147e3a1c21 Author: User Identifier Date: Sat Jul 1 19:09:02 2017 +0200 Add Markov chain diff --git a/markov.py b/markov.py new file mode 100755 index 0000000..8c83feb --- /dev/null +++ b/markov.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +from sys import argv +from string import ascii_letters +from random import choice + + +def get_word_list(text): + # Define a set of allowed letters + allowed_letters = ascii_letters + "áàéèíìóòúù" + "ÁÀÉÈÍÌÓÒÚÙ" + "'." + # Keep only the allowed replacing the others with a space + text = "".join(c.lower() + if c in allowed_letters else " " + for c in text) + # Split by words, using space as separator + return text.split() + + +def get_associations(word_list): + # Build a dictionary where each word is a key, and the values are the list + # of the words that follow the key one + # ['a', 'b', 'a', 'c'] became {'a': ['b', 'c'], 'b': ['a']} + associations = {} + for i, word in enumerate(word_list[:-1]): + if word not in associations: + associations[word] = [] + associations[word].append(word_list[i+1]) + return associations + + +def generate(text, max_words=100): + word_list = get_word_list(text) + associations = get_associations(word_list) + out_list = [] + current_word = choice(word_list) + + for i in range(max_words): + out_list.append(current_word) + choice_list = associations.get(current_word, word_list) + if current_word.endswith("."): + break + current_word = choice(choice_list) + + out_string = " ".join(out_list) + out_string = out_string.replace(" .", ".") + out_string = out_string[0].upper() + out_string[1:] + + return out_string + + +if __name__ == '__main__': + if argv[1:]: + with open(argv[1]) as f: + print(generate(f.read())) + else: + print('Usage:', argv[0], 'something_antani.txt')