using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Runtime.Serialization; using System.Xml; namespace EchoChamber.Library { public class Indexer { public Indexer() { TokenTable = new Dictionary>(); SourceData = new List(); } public string Name { get; set; } public Dictionary> TokenTable {get; set;} protected Random _random = new Random(); public List SourceData { get; set; } public void Index(string content) { foreach(string line in content.Split(new char[]{'\n'}, StringSplitOptions.RemoveEmptyEntries)) { string trimmedLine = line.Trim(); if (string.IsNullOrEmpty(trimmedLine)) { continue; } SourceData.Add(trimmedLine); //PrefixLength sets the number of terms in a row to use when connecting strings. //A lower number is more random and incoherent. //Higher numbers make it more likely to produce longer runs of strings from the original source. for (int prefixLength = 1; prefixLength <= 3; prefixLength++) { Queue prefixQueue = new Queue(); //Preload the queue with terminators to get line starters. for (int i = 0; i < prefixLength; i++) { prefixQueue.Enqueue(EchoToken.TerminatorWord); } foreach (string word in trimmedLine.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)) { string trimmedWord = word.Trim(); if (prefixQueue.Count == prefixLength) { AddPrefix(prefixQueue, trimmedWord); prefixQueue.Dequeue(); } prefixQueue.Enqueue(trimmedWord); } //And put in the terminator at the end. AddPrefix(prefixQueue, EchoToken.TerminatorWord); } } } protected void AddPrefix(Queue prefixQueue, string suffix) { EchoToken token = new EchoToken(prefixQueue.ToArray()); if (!TokenTable.ContainsKey(token)) { TokenTable.Add(token, new List()); } TokenTable[token].Add(suffix); } protected string GetSuffix(Queue prefixQueue) { EchoToken token = new EchoToken(prefixQueue.ToArray()); List suffixList; if (TokenTable.TryGetValue(token, out suffixList)) { return suffixList[_random.Next(suffixList.Count)]; } return EchoToken.TerminatorWord; } public string Generate(int terms, int prefixLength) { return Generate(terms, prefixLength, null); } public string Generate(int terms, int prefixLength, string initialText) { Queue prefixQueue = new Queue(); //Preload the queue with terminators to get line starters. for (int i = 0; i < prefixLength; i++) { prefixQueue.Enqueue(EchoToken.TerminatorWord); } if (!string.IsNullOrEmpty(initialText)) { foreach (string initialWord in initialText.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)) { prefixQueue.Enqueue(initialWord); prefixQueue.Dequeue(); } } List gibberish = new List(); if (!string.IsNullOrEmpty(initialText)) { gibberish.Add(initialText); } for(int i = 0; i < terms; i++) { string suffix = GetSuffix(prefixQueue); if(suffix == EchoToken.TerminatorWord) { break; } gibberish.Add(suffix); prefixQueue.Dequeue(); prefixQueue.Enqueue(suffix); } return string.Join(" ", gibberish.ToArray()); } public void Save(string file) { using (XmlWriter writer = XmlWriter.Create(file)) { DataContractSerializer serializer = new DataContractSerializer(typeof(Indexer)); serializer.WriteObject(writer, this); } } public static Indexer Load(string file) { using (XmlReader reader = XmlReader.Create(file)) { DataContractSerializer serializer = new DataContractSerializer(typeof(Indexer)); return (Indexer)serializer.ReadObject(reader); } } public static Indexer Merge(params Indexer[] indexers) { Indexer merged = new Indexer(); merged.Name = "Merged"; foreach (Indexer toMerge in indexers) { merged.Name += "|" + toMerge.Name; foreach (KeyValuePair> record in toMerge.TokenTable) { if (!merged.TokenTable.ContainsKey(record.Key)) { merged.TokenTable[record.Key] = new List(); } merged.TokenTable[record.Key].AddRange(record.Value); } merged.SourceData.AddRange(toMerge.SourceData); } return merged; } public override string ToString() { return Name; } } }