#!/usr/bin/env python3 from sys import argv from string import ascii_letters from random import choice def get_word_list(text): # Define a set of allowed letters allowed_letters = ascii_letters + "áàéèíìóòúù" + "ÁÀÉÈÍÌÓÒÚÙ" + "'." # Keep only the allowed replacing the others with a space text = "".join(c.lower() if c in allowed_letters else " " for c in text) # Split by words, using space as separator return text.split() def get_associations(word_list): # Build a dictionary where each word is a key, and the values are the list # of the words that follow the key one # ['a', 'b', 'a', 'c'] became {'a': ['b', 'c'], 'b': ['a']} associations = {} for i, word in enumerate(word_list[:-1]): if word not in associations: associations[word] = [] associations[word].append(word_list[i+1]) return associations def generate(text, max_words=100): word_list = get_word_list(text) associations = get_associations(word_list) out_list = [] current_word = choice(word_list) for i in range(max_words): out_list.append(current_word) choice_list = associations.get(current_word, word_list) if current_word.endswith("."): break current_word = choice(choice_list) out_string = " ".join(out_list) out_string = out_string.replace(" .", ".") out_string = out_string[0].upper() + out_string[1:] return out_string if __name__ == '__main__': if argv[1:]: with open(argv[1]) as f: print(generate(f.read())) else: print('Usage:', argv[0], 'something_antani.txt')