aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathanael Sensfelder <SpamShield0@MultiAgentSystems.org>2016-07-24 22:42:09 +0200
committerNathanael Sensfelder <SpamShield0@MultiAgentSystems.org>2016-07-24 22:42:09 +0200
commit8994c7b5cf56f540c71c763173a8927569ba94b3 (patch)
tree5b7822bd827593f612c1e103df92001781ac72c6
parentf74d57fee040218af039f8157cf645d0902ad300 (diff)
downloadzero-of-one-8994c7b5cf56f540c71c763173a8927569ba94b3.zip
zero-of-one-8994c7b5cf56f540c71c763173a8927569ba94b3.tar.bz2
Experimenting with K Order Markovian chains.
I do not recommend using this branch ATM, it has not been tested.
-rw-r--r--README.md44
-rw-r--r--src/core/CMakeLists.txt1
-rw-r--r--src/core/assimilate.c263
-rw-r--r--src/core/create_sentences.c359
-rw-r--r--src/core/knowledge.c149
-rw-r--r--src/core/knowledge.h18
-rw-r--r--src/core/knowledge_types.h24
-rw-r--r--src/core/sequence.c129
-rw-r--r--src/io/error.h2
-rw-r--r--src/pervasive.h4
-rw-r--r--src/tool/CMakeLists.txt1
-rw-r--r--src/tool/sorted_list.c79
-rw-r--r--src/tool/sorted_list.h19
13 files changed, 787 insertions, 305 deletions
diff --git a/README.md b/README.md
index 12e1eac..e60f8a0 100644
--- a/README.md
+++ b/README.md
@@ -15,10 +15,54 @@ $ cd build
$ cmake ..
$ make
$ ./zero_of_one -h
+Usage: ./zero_of_one [option_1 option_2 ...] NICKNAME [ALIAS_1 ALIAS_2 ...]
+NICKNAME is used as the IRC nickname value.
+If NICKNAME or any ALIAS is found in an event, the program will reply.
+
+Available options:
+ [--data-filename | -df] FILENAME
+ Learn content from FILENAME before connecting.
+ Default: ./memory.txt.
+ [--new-data-filename | -ndf] FILENAME
+ Store new data learned in FILENAME.
+ Default: value of the --data-filename param.
+ [--irc-server-addr | -isa] IRC_SERVER_ADDR
+ Connect to this server address.
+ Default: irc.foonetic.net.
+ [--irc-server-port | -isp] IRC_SERVER_PORT
+ Connect to this server port.
+ Default: 6667.
+ [--irc-server-channel | -isc] IRC_SERVER_CHANNEL
+ Connect to this server's channel.
+ Default: #theborghivemind.
+ [--irc-username | -iu] USERNAME
+ Connect using this as 'username' (shown in WHOIS).
+ Default: zeroofone.
+ [--irc-realname | -ir] REALNAME
+ Connect using this as 'realname' (shown in WHOIS).
+ Default: Zero of One (bot).
+ [--reply-rate | -rr] REPLY_RATE
+ Chance to reply to an event (integer, range [0, 100]).
+ Default: 8.
```
+Note that if the NICKNAME has a uppercase character, it will never be matched by
+any inputs, as all inputs are converted to lowercase. A simple solution is to
+have a lowercase version of NICKNAME in the ALIAS list.
## Main Objectives
- POSIX compliance.
- Paranoia levels of error checking.
- Low RAM usage.
- Giggles.
+
+## Behavior
+- Replies when it reads messages containing a word matching its NICKNAME or one
+ of its ALIASes.
+- Replies to messages with a REPLY_RATE percent chance. The word used to
+ construct the reply is the less known one.
+- Reacts to user joining in. If the username is in enough samples, it is used
+ to construct the greeting, otherwise a random word is selected as starting
+ point.
+- Repetition is taken into account and augments the strength of the links.
+- Some punctuation symbols are identified (e.g. "example." will be understood
+ as "example" followed by ".").
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 2722355..a8ec17e 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -4,6 +4,7 @@ set(
${CMAKE_CURRENT_SOURCE_DIR}/knowledge.c
${CMAKE_CURRENT_SOURCE_DIR}/assimilate.c
${CMAKE_CURRENT_SOURCE_DIR}/create_sentences.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/sequence.c
)
set(SRC_FILES ${SRC_FILES} PARENT_SCOPE)
diff --git a/src/core/assimilate.c b/src/core/assimilate.c
index 28f4dd4..49efe23 100644
--- a/src/core/assimilate.c
+++ b/src/core/assimilate.c
@@ -7,121 +7,109 @@
/** Functions to assimilate sentences using a ZoO_knowledge structure *********/
-static int link_to
+
+static int add_sequence
(
- ZoO_index links_count [const restrict static 1],
- ZoO_index * links_occurrences [const restrict static 1],
- ZoO_index * links [const restrict static 1],
- ZoO_index const target
+ ZoO_index links_count [const],
+ struct ZoO_knowledge_link * links [const],
+ ZoO_index const sequence [const restrict static ZoO_MARKOV_ORDER],
+ ZoO_index const target_i,
+ ZoO_index const offset
)
{
- ZoO_index i, * new_p;
+ ZoO_index link_index, i;
+ struct ZoO_knowledge_link * link;
+ ZoO_index * new_p;
- for (i = 0; i < *links_count; ++i)
+ if (ZoO_knowledge_get_link(links_count, links, (sequence + offset), &link_index) < 0)
{
- if ((*links)[i] == target)
- {
- if ((*links_occurrences)[i] == ZoO_INDEX_MAX)
- {
- ZoO_S_WARNING
- (
- "Maximum link occurrences count has been reached."
- );
+ return -1;
+ }
- return -1;
- }
+ link = (*links + link_index);
+ link->occurrences += 1;
- (*links_occurrences)[i] += 1;
+ for (i = 0; i < link->targets_count; ++i)
+ {
+ if (link->targets[i] == sequence[target_i])
+ {
+ link->targets_occurrences[i] += 1;
return 0;
}
}
- if (*links_count == ZoO_INDEX_MAX)
- {
- ZoO_S_WARNING("Maximum links count has been reached.");
-
- return -1;
- }
+ link->targets_count += 1;
new_p =
(ZoO_index *) realloc
(
- *links_occurrences,
- (
- (
- /* Safe: *links_count < ZoO_INDEX_MAX */
- (size_t) (*links_count + 1)
- )
- * sizeof(ZoO_index)
- )
+ (void *) link->targets,
+ (sizeof(ZoO_index) * link->targets_count)
);
if (new_p == (ZoO_index *) NULL)
{
- ZoO_S_ERROR("Could not reallocate a link occurrences list.");
+ link->targets_count -= 1;
+ /* TODO: err. */
return -1;
}
- new_p[*links_count] = 1;
-
- *links_occurrences = new_p;
+ link->targets = new_p;
+ link->targets[link->targets_count - 1] = sequence[target_i];
new_p =
(ZoO_index *) realloc
(
- *links,
- (
- (
- /* Safe: *links_count < ZoO_INDEX_MAX */
- (size_t) (*links_count + 1)
- ) * sizeof(ZoO_index)
- )
+ (void *) link->targets_occurrences,
+ (sizeof(ZoO_index) * link->targets_count)
);
if (new_p == (ZoO_index *) NULL)
{
- ZoO_S_ERROR("Could not reallocate a link list.");
+ link->targets_count -= 1;
+ /* TODO: err. */
return -1;
}
- new_p[*links_count] = target;
-
- *links = new_p;
-
- *links_count += 1;
+ link->targets_occurrences = new_p;
+ link->targets_occurrences[link->targets_count - 1] = 1;
return 0;
}
-static int link_words
+static int add_word_occurrence
(
struct ZoO_knowledge k [const restrict static 1],
- ZoO_index const a,
- ZoO_index const b
+ ZoO_index const sequence [const static ((ZoO_MARKOV_ORDER * 2) + 1)]
)
{
+ ZoO_index w;
int error;
+ w = sequence[ZoO_MARKOV_ORDER];
+
error =
- link_to
+ add_sequence
(
- &(k->words[a].forward_links_count),
- &(k->words[a].forward_links_occurrences),
- &(k->words[a].forward_links),
- b
+ &(k->words[w].forward_links_count),
+ &(k->words[w].forward_links),
+ sequence + (ZoO_MARKOV_ORDER + 1),
+ (ZoO_MARKOV_ORDER - 1),
+ 0
);
error =
(
- link_to
+ add_sequence
(
- &(k->words[b].backward_links_count),
- &(k->words[b].backward_links_occurrences),
- &(k->words[b].backward_links),
- a
+ &(k->words[w].backward_links_count),
+ &(k->words[w].backward_links),
+ sequence,
+ 0,
+ 1
)
| error
);
@@ -129,83 +117,144 @@ static int link_words
return error;
}
-int ZoO_knowledge_assimilate
+static int should_assimilate
(
- struct ZoO_knowledge k [const static 1],
struct ZoO_strings string [const restrict static 1],
ZoO_index const aliases_count,
const char * restrict aliases [const restrict static aliases_count]
)
{
- int error;
- ZoO_index curr_word, next_word;
- ZoO_index curr_word_id, next_word_id;
-
- curr_word = 0;
+ ZoO_index i;
+ /* Don't assimilate empty strings. */
if (string->words_count == 0)
{
return 0;
}
- for (curr_word = 0; curr_word < aliases_count; ++curr_word)
+ /* Don't assimilate things that start with our name. */
+ for (i = 0; i < aliases_count; ++i)
{
- if (ZoO_IS_PREFIX(aliases[curr_word], string->words[0]))
+ if (ZoO_IS_PREFIX(aliases[i], string->words[0]))
{
return 0;
}
}
- curr_word = 0;
+ return 1;
+}
+
+static int init_sequence
+(
+ struct ZoO_knowledge k [const static 1],
+ struct ZoO_strings string [const restrict static 1],
+ ZoO_index sequence [const restrict static ((ZoO_MARKOV_ORDER * 2) + 1)]
+)
+{
+ ZoO_index i;
+
+ sequence[0] = ZoO_WORD_START_OF_LINE;
- if (ZoO_knowledge_learn(k, string->words[curr_word], &curr_word_id) < 0)
+ for (i = 0; i < ZoO_MARKOV_ORDER; ++i)
+ {
+ sequence[ZoO_MARKOV_ORDER - i] = ZoO_WORD_START_OF_LINE;
+
+ if (i < string->words_count)
+ {
+ if
+ (
+ ZoO_knowledge_learn
+ (
+ k,
+ string->words[i],
+ (sequence + (ZoO_MARKOV_ORDER + i + 1))
+ ) < 0
+ )
+ {
+ return -1;
+ }
+ }
+ else
+ {
+ sequence[ZoO_MARKOV_ORDER + i + 1] = ZoO_WORD_END_OF_LINE;
+ }
+ }
+
+ return 0;
+}
+
+int ZoO_knowledge_assimilate
+(
+ struct ZoO_knowledge k [const static 1],
+ struct ZoO_strings string [const restrict static 1],
+ ZoO_index const aliases_count,
+ const char * restrict aliases [const restrict static aliases_count]
+)
+{
+ int error;
+ ZoO_index sequence[(ZoO_MARKOV_ORDER * 2) + 1];
+ ZoO_index next_word, new_word, new_word_id;
+
+ if (!should_assimilate(string, aliases_count, aliases))
+ {
+ return 0;
+ }
+
+ if (init_sequence(k, string, sequence) < 0)
{
return -1;
}
- if (link_words(k, ZoO_WORD_START_OF_LINE, curr_word_id) < 0)
+ if (add_word_occurrence(k, sequence) < 0)
{
error = -1;
- ZoO_WARNING
- (
- "Could not indicate that '"
- ZoO_CHAR_STRING_SYMBOL
- "' was the first word of the sentence.",
- string->words[0]
- );
- }
+ /* There's a pun... */
+ ZoO_S_WARNING("Could not add a link between words.");
- next_word = 1;
+ return -1;
+ }
error = 0;
- while (next_word < string->words_count)
+ next_word = 0;
+ new_word = ZoO_MARKOV_ORDER;
+
+ while (next_word <= string->words_count)
{
- /* prevents words [restrict], k [restrict] */
- if (ZoO_knowledge_learn(k, string->words[next_word], &next_word_id) < 0)
+ if (new_word < string->words_count)
{
- return -1;
+ /* prevents words [restrict], k [restrict] */
+ if (ZoO_knowledge_learn(k, string->words[new_word], &new_word_id) < 0)
+ {
+ return -1;
+ }
+ }
+ else
+ {
+ new_word_id = ZoO_WORD_END_OF_LINE;
}
- if (link_words(k, curr_word_id, next_word_id) < 0)
+ memmove
+ (
+ (void *) sequence,
+ (const void *) (sequence + 1),
+ /* Accepts 0. */
+ (sizeof(ZoO_index) * (ZoO_MARKOV_ORDER * 2))
+ );
+
+ sequence[ZoO_MARKOV_ORDER * 2] = new_word_id;
+
+ if (add_word_occurrence(k, sequence) < 0)
{
error = -1;
- ZoO_WARNING
- (
- "Could not add a link between words '"
- ZoO_CHAR_STRING_SYMBOL
- "' and '"
- ZoO_CHAR_STRING_SYMBOL
- "'.",
- string->words[curr_word],
- string->words[next_word]
- );
+ /* There's a pun... */
+ ZoO_S_WARNING("Could not add a link between words.");
+
+ return -1;
}
- curr_word = next_word;
- curr_word_id = next_word_id;
/*
* Safe:
* - next_word < words_count
@@ -214,19 +263,7 @@ int ZoO_knowledge_assimilate
* next_word < ZoO_INDEX_MAX
*/
next_word += 1;
- }
-
- if (link_words(k, curr_word_id, ZoO_WORD_END_OF_LINE) < 0)
- {
- error = -1;
-
- ZoO_WARNING
- (
- "Could not indicate that '"
- ZoO_CHAR_STRING_SYMBOL
- "' was the last word of the sentence.",
- string->words[curr_word_id]
- );
+ new_word += 1;
}
return error;
diff --git a/src/core/create_sentences.c b/src/core/create_sentences.c
index 6b69111..a3640dc 100644
--- a/src/core/create_sentences.c
+++ b/src/core/create_sentences.c
@@ -18,11 +18,10 @@
* (== (sum links_occurrences) occurrences).
* (can_store ZoO_index (length links)).
*/
-static ZoO_index pick_an_index
+static ZoO_index pick_index
(
ZoO_index const occurrences,
- const ZoO_index links_occurrences [const restrict static 1],
- const ZoO_index links [const restrict static 1]
+ const ZoO_index links_occurrences [const restrict static 1]
)
{
ZoO_index result, accumulator, random_number;
@@ -62,13 +61,13 @@ static ZoO_index pick_an_index
}
/* Safe: (< result (length links)) */
- return links[result];
+ return result;
}
static unsigned char * extend_left
(
struct ZoO_knowledge k [const restrict static 1],
- ZoO_index word_id,
+ ZoO_index sequence [const restrict static ZoO_MARKOV_ORDER],
ZoO_char current_sentence [static 1],
size_t sentence_size [const restrict static 1],
ZoO_index credits [const static 1]
@@ -77,17 +76,7 @@ static unsigned char * extend_left
size_t addition_size;
struct ZoO_knowledge_word * w;
ZoO_char * next_sentence;
-
- w = (k->words + word_id);
-
- if
- (
- (w->special == ZoO_WORD_STARTS_SENTENCE)
- || (w->occurrences == 0)
- )
- {
- return current_sentence;
- }
+ ZoO_index j;
/* prevents current_sentence [restrict] */
next_sentence = current_sentence;
@@ -100,15 +89,8 @@ static unsigned char * extend_left
}
*credits -= 1;
- word_id =
- pick_an_index
- (
- w->occurrences,
- w->backward_links_occurrences,
- w->backward_links
- );
- w = (k->words + word_id);
+ w = (k->words + sequence[ZoO_MARKOV_ORDER - 1]);
switch (w->special)
{
@@ -205,13 +187,51 @@ static unsigned char * extend_left
/* prevents current_sentence [const] */
current_sentence = next_sentence;
+
+ memmove
+ (
+ (void *) (sequence + 1),
+ (const void *) sequence,
+ /* Accepts 0. */
+ (sizeof(ZoO_index) * (ZoO_MARKOV_ORDER - 1))
+ );
+
+ if
+ (
+ ZoO_knowledge_find_link
+ (
+ w->backward_links_count,
+ w->backward_links,
+ (sequence + 1),
+ &j
+ )
+ < 0
+ )
+ {
+ ZoO_S_ERROR("Unexpectedly, no backtracking link was found.");
+
+ break;
+ }
+
+ sequence[0] =
+ w->backward_links[j].targets
+ [
+ pick_index
+ (
+ w->backward_links[j].occurrences,
+ w->backward_links[j].targets_occurrences
+ )
+ ];
+
+ /* prevents current_sentence [const] */
+ current_sentence = next_sentence;
}
}
static unsigned char * extend_right
(
struct ZoO_knowledge k [const restrict static 1],
- ZoO_index word_id,
+ ZoO_index sequence [const restrict static ZoO_MARKOV_ORDER],
ZoO_char current_sentence [static 1],
size_t sentence_size [const restrict static 1],
ZoO_index credits [const static 1]
@@ -220,17 +240,7 @@ static unsigned char * extend_right
size_t addition_size;
struct ZoO_knowledge_word * w;
ZoO_char * next_sentence;
-
- w = (k->words + word_id);
-
- if
- (
- (w->special == ZoO_WORD_ENDS_SENTENCE)
- || (w->occurrences == 0)
- )
- {
- return current_sentence;
- }
+ ZoO_index j;
/* prevents current_sentence [restrict] */
next_sentence = current_sentence;
@@ -244,15 +254,7 @@ static unsigned char * extend_right
*credits -= 1;
- word_id =
- pick_an_index
- (
- w->occurrences,
- w->forward_links_occurrences,
- w->forward_links
- );
-
- w = (k->words + word_id);
+ w = (k->words + sequence[0]);
switch (w->special)
{
@@ -340,95 +342,233 @@ static unsigned char * extend_right
/* prevents current_sentence [const] */
current_sentence = next_sentence;
+
+ memmove
+ (
+ (void *) sequence,
+ (const void *) (sequence + 1),
+ /* Accepts 0. */
+ (sizeof(ZoO_index) * (ZoO_MARKOV_ORDER - 1))
+ );
+
+ if
+ (
+ ZoO_knowledge_find_link
+ (
+ w->forward_links_count,
+ w->forward_links,
+ sequence,
+ &j
+ )
+ < 0
+ )
+ {
+ ZoO_S_ERROR("Unexpectedly, no forward link was found.");
+
+ break;
+ }
+
+ sequence[ZoO_MARKOV_ORDER - 1] =
+ w->forward_links[j].targets
+ [
+ pick_index
+ (
+ w->forward_links[j].occurrences,
+ w->forward_links[j].targets_occurrences
+ )
+ ];
}
}
-int ZoO_knowledge_extend
+static ZoO_index select_first_word
(
struct ZoO_knowledge k [const static 1],
const struct ZoO_strings string [const],
- int const ignore_first_word,
- ZoO_char * result [const static 1]
+ int const ignore_first_word
)
{
- int word_found;
- size_t sentence_size;
- ZoO_index i, word_id, word_min_score, word_min_id, credits;
+ ZoO_index i, word_id, word_min_score, word_min_id;
+ ZoO_index word_found;
- credits = ZoO_MAX_REPLY_WORDS;
+ if (string == (struct ZoO_strings *) NULL)
+ {
+ return word_min_id = (rand() % k->words_count);
+ }
- if (string != (struct ZoO_strings *) NULL)
+ if (ignore_first_word)
+ {
+ i = 1;
+ }
+ else
{
- word_found = 0;
+ i = 0;
+ }
- if (ignore_first_word)
- {
- i = 1;
- }
- else
- {
- i = 0;
- }
+ word_found = 0;
- for (; i < string->words_count; ++i)
+ for (; i < string->words_count; ++i)
+ {
+ /* prevents k [restrict] */
+ if (ZoO_knowledge_find(k, string->words[i], &word_min_id) == 0)
{
- /* prevents k [restrict] */
- if (ZoO_knowledge_find(k, string->words[i], &word_min_id) == 0)
- {
- word_found = 1;
- word_min_score = k->words[word_min_id].occurrences;
+ word_found = 1;
+ word_min_score = k->words[word_min_id].occurrences;
- break;
- }
+ break;
}
+ }
+
+ if (word_found == 0)
+ {
+ return word_min_id = (rand() % k->words_count);
+ }
- if (word_found == 0)
+ for (; i < string->words_count; ++i)
+ {
+ if
+ (
+ (ZoO_knowledge_find(k, string->words[i], &word_id) == 0)
+ && (k->words[word_id].occurrences < word_min_score)
+ )
{
- word_min_id = (rand() % k->words_count);
- word_min_score = k->words[word_min_id].occurrences;
+ word_min_score = k->words[word_id].occurrences;
+ word_min_id = word_id;
}
+ }
- for (; i < string->words_count; ++i)
- {
- if
+ return word_min_id;
+}
+
+static void init_sequence
+(
+ struct ZoO_knowledge k [const static 1],
+ const struct ZoO_strings string [const],
+ int const ignore_first_word,
+ ZoO_index sequence [const static (ZoO_MARKOV_ORDER * 2) + 1]
+)
+{
+ ZoO_index i, j, accumulator, random_number;
+ struct ZoO_knowledge_word * fiw;
+
+ sequence[ZoO_MARKOV_ORDER] = select_first_word(k, string, ignore_first_word);
+
+ fiw = (k->words + sequence[ZoO_MARKOV_ORDER]);
+
+ for (i = 0; i < ZoO_MARKOV_ORDER; ++i)
+ {
+ sequence[ZoO_MARKOV_ORDER - i - 1] = ZoO_WORD_START_OF_LINE;
+ sequence[ZoO_MARKOV_ORDER + i + 1] = ZoO_WORD_END_OF_LINE;
+ }
+/*
+ i = 0;
+ accumulator = 0;
+
+ random_number = (((ZoO_index) rand()) % fiw->occurrences);
+
+ while (accumulator < random_number)
+ {
+ i += 1;
+ accumulator += fiw->forward_links[i].occurrences;
+ }
+*/
+ if (fiw->forward_links_count == 0)
+ {
+ ZoO_S_FATAL("First word has no forward links.");
+ }
+
+ i = (((ZoO_index) rand()) % fiw->forward_links_count);
+
+ memcpy
+ (
+ (void *) (sequence + ZoO_MARKOV_ORDER + 1),
+ fiw->forward_links[i].sequence,
+ sizeof(ZoO_index) * (ZoO_MARKOV_ORDER - 1)
+ );
+
+ sequence[ZoO_MARKOV_ORDER * 2] =
+ fiw->forward_links[i].targets
+ [
+ pick_index
(
- (ZoO_knowledge_find(k, string->words[i], &word_id) == 0)
- && (k->words[word_id].occurrences < word_min_score)
+ fiw->forward_links[i].occurrences,
+ fiw->forward_links[i].targets_occurrences
)
- {
- word_min_score = k->words[word_id].occurrences;
- word_min_id = word_id;
- }
- }
- }
- else
+ ];
+
+ for (i = 1; i <= ZoO_MARKOV_ORDER; ++i)
{
- word_min_id = (rand() % k->words_count);
+ fiw = (k->words + sequence[(ZoO_MARKOV_ORDER * 2) - i]);
+
+ if
+ (
+ ZoO_knowledge_find_link
+ (
+ fiw->backward_links_count,
+ fiw->backward_links,
+ sequence + (ZoO_MARKOV_ORDER - i + 1),
+ &j
+ )
+ < 0
+ )
+ {
+ ZoO_S_ERROR("Unexpectedly, no back link was found.");
+
+ break;
+ }
+
+ sequence[ZoO_MARKOV_ORDER - i] =
+ fiw->backward_links[j].targets
+ [
+ pick_index
+ (
+ fiw->backward_links[j].occurrences,
+ fiw->backward_links[j].targets_occurrences
+ )
+ ];
}
+}
+
+int ZoO_knowledge_extend
+(
+ struct ZoO_knowledge k [const static 1],
+ const struct ZoO_strings string [const],
+ int const ignore_first_word,
+ ZoO_char * result [const static 1]
+)
+{
+ int word_found;
+ size_t sentence_size;
+ ZoO_index sequence[(ZoO_MARKOV_ORDER * 2) + 1];
+ ZoO_index first_word, credits;
+
+ credits = ZoO_MAX_REPLY_WORDS;
+
+ init_sequence(k, string, ignore_first_word, sequence);
+ first_word = sequence[ZoO_MARKOV_ORDER];
/* 3: 2 spaces + '\0' */
/* FIXME: not overflow-safe */
- switch (k->words[word_min_id].special)
+ switch (k->words[first_word].special)
{
case ZoO_WORD_REMOVES_LEFT_SPACE:
case ZoO_WORD_REMOVES_RIGHT_SPACE:
/* word + ' ' + '\0' */
- sentence_size = (strlen(k->words[word_min_id].word) + 2);
+ sentence_size = (strlen(k->words[first_word].word) + 2);
break;
case ZoO_WORD_HAS_NO_EFFECT:
/* word + ' ' * 2 + '\0' */
- sentence_size = (strlen(k->words[word_min_id].word) + 3);
+ sentence_size = (strlen(k->words[first_word].word) + 3);
break;
default:
ZoO_WARNING
(
"'%s' was unexpectedly selected as pillar.",
- k->words[word_min_id].word
+ k->words[first_word].word
);
/* word + '[' + ']' + ' ' * 2 + '\0' */
- sentence_size = (strlen(k->words[word_min_id].word) + 5);
+ sentence_size = (strlen(k->words[first_word].word) + 5);
break;
}
@@ -441,7 +581,7 @@ int ZoO_knowledge_extend
return -2;
}
- switch (k->words[word_min_id].special)
+ switch (k->words[first_word].special)
{
case ZoO_WORD_REMOVES_LEFT_SPACE:
snprintf
@@ -449,7 +589,7 @@ int ZoO_knowledge_extend
*result,
sentence_size,
ZoO_CHAR_STRING_SYMBOL " ",
- k->words[word_min_id].word
+ k->words[first_word].word
);
break;
@@ -459,7 +599,7 @@ int ZoO_knowledge_extend
*result,
sentence_size,
" " ZoO_CHAR_STRING_SYMBOL,
- k->words[word_min_id].word
+ k->words[first_word].word
);
break;
@@ -469,7 +609,7 @@ int ZoO_knowledge_extend
*result,
sentence_size,
" " ZoO_CHAR_STRING_SYMBOL " ",
- k->words[word_min_id].word
+ k->words[first_word].word
);
break;
@@ -479,27 +619,36 @@ int ZoO_knowledge_extend
*result,
sentence_size,
" [" ZoO_CHAR_STRING_SYMBOL "] ",
- k->words[word_min_id].word
+ k->words[first_word].word
);
break;
}
- if ((word_min_score == 0) || (credits == 0))
- {
- return 0;
- }
-
- --credits;
-
/* prevents result [restrict] */
- *result = extend_left(k, word_min_id, *result, &sentence_size, &credits);
+ *result =
+ extend_right
+ (
+ k,
+ (sequence + ZoO_MARKOV_ORDER + 1),
+ *result,
+ &sentence_size,
+ &credits
+ );
if (*result == (ZoO_char *) NULL)
{
return -2;
}
- *result = extend_right(k, word_min_id, *result, &sentence_size, &credits);
+ *result =
+ extend_left
+ (
+ k,
+ sequence,
+ *result,
+ &sentence_size,
+ &credits
+ );
if (*result == (ZoO_char *) NULL)
{
diff --git a/src/core/knowledge.c b/src/core/knowledge.c
index 9b4e250..a2bf708 100644
--- a/src/core/knowledge.c
+++ b/src/core/knowledge.c
@@ -3,6 +3,7 @@
#include <stdint.h> /* defines SIZE_MAX */
#include "../io/error.h"
+#include "../tool/sorted_list.h"
#include "knowledge.h"
@@ -35,6 +36,28 @@ const ZoO_char const ZoO_knowledge_forbidden_chars[8]=
'>'
};
+static int cmp_word
+(
+ const void * const a,
+ const void * const b,
+ const void * const other
+)
+{
+ ZoO_char const * word;
+ ZoO_index const * sorted_index;
+ struct ZoO_knowledge const * k;
+
+ word = (ZoO_char const *) a;
+ sorted_index = (ZoO_index const *) b;
+ k = (struct ZoO_knowledge *) other;
+
+ return strcmp
+ (
+ (const char *) word,
+ (const char *) k->words[*sorted_index].word
+ );
+}
+
/* See "knowledge.h". */
int ZoO_knowledge_find
(
@@ -43,74 +66,31 @@ int ZoO_knowledge_find
ZoO_index result [const restrict static 1]
)
{
- int cmp;
- ZoO_index i, current_min, current_max;
+ ZoO_index r;
- /* This is a binary search. */
-
- if (k->words_count < 1)
+ if
+ (
+ ZoO_sorted_list_index_of
+ (
+ k->words_count,
+ (void const *) k->sorted_indices,
+ (void const *) word,
+ sizeof(ZoO_index),
+ cmp_word,
+ (void const *) k,
+ &r
+ )
+ == 0
+ )
{
- *result = 0;
+ *result = k->sorted_indices[r];
- return -1;
+ return 0;
}
- current_min = 0;
-
- /* Safe: (>= k->words_count 1) */
- current_max = (k->words_count - 1);
-
- for (;;)
- {
- /* FIXME: overflow-safe? */
- i = ((current_min + current_max) / 2);
-
- if (i == k->words_count)
- {
- *result = k->words_count;
-
- return -1;
- }
-
- cmp =
- /* XXX: Assumed to be compatible with ZoO_char */
- strcmp
- (
- (char *) word,
- (const char *) k->words[k->sorted_indices[i]].word
- );
-
- if (cmp > 0)
- {
- if ((current_min > current_max))
- {
- *result = (i + 1);
-
- return -1;
- }
+ *result = r;
- /* FIXME: overflow-safe? */
- current_min = (i + 1);
- }
- else if (cmp < 0)
- {
- if ((current_min > current_max) || (i == 0))
- {
- *result = i;
-
- return -1;
- }
-
- /* overflow-safe */
- current_max = (i - 1);
- }
- else
- {
- *result = k->sorted_indices[i];
-
- return 0;
- }
- }
+ return -1;
}
static void word_init (struct ZoO_knowledge_word w [const restrict static 1])
@@ -121,10 +101,8 @@ static void word_init (struct ZoO_knowledge_word w [const restrict static 1])
w->occurrences = 1;
w->forward_links_count = 0;
w->backward_links_count = 0;
- w->forward_links_occurrences = (ZoO_index *) NULL;
- w->backward_links_occurrences = (ZoO_index *) NULL;
- w->forward_links = (ZoO_index *) NULL;
- w->backward_links = (ZoO_index *) NULL;
+ w->forward_links = (struct ZoO_knowledge_link *) NULL;
+ w->backward_links = (struct ZoO_knowledge_link *) NULL;
}
/*
@@ -209,6 +187,21 @@ int ZoO_knowledge_initialize (struct ZoO_knowledge k [const static 1])
return 0;
}
+static void finalize_links
+(
+ ZoO_index const count,
+ struct ZoO_knowledge_link links [const restrict static count]
+)
+{
+ ZoO_index i;
+
+ for (i = 0; i < count; ++i)
+ {
+ free((void *) links[i].targets_occurrences);
+ free((void *) links[i].targets);
+ }
+}
+
/*
* Frees all the memory used by {w}, but not {w} itself.
* The values of {w}'s members are set to reflect the changes.
@@ -225,32 +218,22 @@ static void finalize_word
w->word = (ZoO_char *) NULL;
}
- if (w->forward_links_occurrences != (ZoO_index *) NULL)
+ if (w->forward_links != (struct ZoO_knowledge_link *) NULL)
{
- free((void *) w->forward_links_occurrences);
-
- w->forward_links_occurrences = (ZoO_index *) NULL;
- }
+ finalize_links(w->forward_links_count, w->forward_links);
- if (w->backward_links_occurrences != (ZoO_index *) NULL)
- {
- free((void *) w->backward_links_occurrences);
-
- w->backward_links_occurrences = (ZoO_index *) NULL;
- }
-
- if (w->forward_links != (ZoO_index *) NULL)
- {
free((void *) w->forward_links);
- w->forward_links = (ZoO_index *) NULL;
+ w->forward_links = (struct ZoO_knowledge_link *) NULL;
}
- if (w->backward_links != (ZoO_index *) NULL)
+ if (w->backward_links != (struct ZoO_knowledge_link *) NULL)
{
+ finalize_links(w->backward_links_count, w->backward_links);
+
free((void *) w->backward_links);
- w->backward_links = (ZoO_index *) NULL;
+ w->backward_links = (struct ZoO_knowledge_link *) NULL;
}
w->forward_links_count = 0;
diff --git a/src/core/knowledge.h b/src/core/knowledge.h
index c9ea342..93f5f49 100644
--- a/src/core/knowledge.h
+++ b/src/core/knowledge.h
@@ -75,4 +75,22 @@ int ZoO_knowledge_extend
ZoO_char * result [const static 1]
);
+int ZoO_knowledge_find_link
+(
+ ZoO_index const links_count,
+ struct ZoO_knowledge_link links [const],
+ ZoO_index const sequence [const restrict static ZoO_SEQUENCE_SIZE],
+ ZoO_index result [const restrict static 1]
+);
+
+/* Create it if it's not found. */
+int ZoO_knowledge_get_link
+(
+ ZoO_index links_count [const],
+ struct ZoO_knowledge_link * links [const],
+ ZoO_index const sequence [const restrict static ZoO_S_LINK_SIZE],
+ ZoO_index result [const restrict static 1]
+);
+
+
#endif
diff --git a/src/core/knowledge_types.h b/src/core/knowledge_types.h
index f2e8161..8f541e7 100644
--- a/src/core/knowledge_types.h
+++ b/src/core/knowledge_types.h
@@ -6,6 +6,14 @@
#define ZoO_WORD_START_OF_LINE 0
#define ZoO_WORD_END_OF_LINE 1
+#if ZoO_MARKOV_ORDER == 1
+ #define ZoO_SEQUENCE_SIZE 1
+#else
+ #define ZoO_SEQUENCE_SIZE ZoO_MARKOV_ORDER - 1
+#endif
+
+#define ZoO_S_LINK_SIZE (ZoO_SEQUENCE_SIZE + 1)
+
/* XXX: are we as close to immutable as we want to be? */
extern unsigned int const ZoO_knowledge_punctuation_chars_count;
extern const ZoO_char const ZoO_knowledge_punctuation_chars[7];
@@ -22,6 +30,15 @@ enum ZoO_knowledge_special_effect
ZoO_WORD_REMOVES_RIGHT_SPACE
};
+struct ZoO_knowledge_link
+{
+ ZoO_index sequence[ZoO_SEQUENCE_SIZE];
+ ZoO_index occurrences;
+ ZoO_index targets_count;
+ ZoO_index * targets_occurrences;
+ ZoO_index * targets;
+};
+
struct ZoO_knowledge_word
{
size_t word_size;
@@ -30,12 +47,11 @@ struct ZoO_knowledge_word
ZoO_index occurrences;
ZoO_index forward_links_count;
ZoO_index backward_links_count;
- ZoO_index * forward_links_occurrences;
- ZoO_index * backward_links_occurrences;
- ZoO_index * forward_links;
- ZoO_index * backward_links;
+ struct ZoO_knowledge_link * forward_links;
+ struct ZoO_knowledge_link * backward_links;
};
+
struct ZoO_knowledge
{
ZoO_index words_count;
diff --git a/src/core/sequence.c b/src/core/sequence.c
new file mode 100644
index 0000000..0f4043e
--- /dev/null
+++ b/src/core/sequence.c
@@ -0,0 +1,129 @@
+#include <stdlib.h>
+#include <string.h>
+
+#include "../io/error.h"
+#include "../tool/sorted_list.h"
+
+#include "knowledge.h"
+
+static int cmp_seq_link
+(
+ const void * const a,
+ const void * const b,
+ const void * const other
+)
+{
+ ZoO_index j;
+ ZoO_index * sequence;
+ struct ZoO_knowledge_link * link;
+
+ sequence = (ZoO_index *) a;
+ link = (struct ZoO_knowledge_link *) b;
+
+ for (j = 0; j < ZoO_SEQUENCE_SIZE; ++j)
+ {
+ if (sequence[j] < link->sequence[j])
+ {
+ return -1;
+ }
+ else if (sequence[j] > link->sequence[j])
+ {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+int ZoO_knowledge_find_link
+(
+ ZoO_index const links_count,
+ struct ZoO_knowledge_link links [const],
+ ZoO_index const sequence [const restrict static ZoO_SEQUENCE_SIZE],
+ ZoO_index result [const restrict static 1]
+)
+{
+ return
+ ZoO_sorted_list_index_of
+ (
+ links_count,
+ (void const *) links,
+ (void const *) sequence,
+ sizeof(struct ZoO_knowledge_link),
+ cmp_seq_link,
+ (void const *) NULL,
+ result
+ );
+}
+
+int ZoO_knowledge_get_link
+(
+ ZoO_index links_count [const],
+ struct ZoO_knowledge_link * links [const],
+ ZoO_index const sequence [const restrict static ZoO_SEQUENCE_SIZE],
+ ZoO_index result [const restrict static 1]
+)
+{
+ struct ZoO_knowledge_link * new_p;
+
+ if
+ (
+ ZoO_sorted_list_index_of
+ (
+ *links_count,
+ (void const *) *links,
+ (void const *) sequence,
+ sizeof(struct ZoO_knowledge_link),
+ cmp_seq_link,
+ (void const *) NULL,
+ result
+ ) == 0
+ )
+ {
+ return 0;
+ }
+
+ *links_count += 1;
+
+ new_p =
+ (struct ZoO_knowledge_link *) realloc
+ (
+ (void *) *links,
+ (sizeof(struct ZoO_knowledge_link) * (*links_count))
+ );
+
+ if (new_p == (struct ZoO_knowledge_link *) NULL)
+ {
+ *links_count -= 1;
+
+ return -1;
+ }
+
+ if (*result < (*links_count - 1))
+ {
+ memmove(
+ (void *) (new_p + *result + 1),
+ (const void *) (new_p + *result),
+ (sizeof(struct ZoO_knowledge_link) * (*links_count - 1 - *result))
+ );
+ }
+
+ *links = new_p;
+
+ new_p += *result;
+
+ memcpy
+ (
+ (void *) new_p->sequence,
+ (void const *) sequence,
+ /* can be zero */
+ (sizeof(ZoO_index) * ZoO_SEQUENCE_SIZE)
+ );
+
+ new_p->occurrences = 0;
+ new_p->targets_count = 0;
+ new_p->targets_occurrences = (ZoO_index *) NULL;
+ new_p->targets = (ZoO_index *) NULL;
+
+ return 0;
+}
diff --git a/src/io/error.h b/src/io/error.h
index e4267a0..be7359f 100644
--- a/src/io/error.h
+++ b/src/io/error.h
@@ -23,6 +23,8 @@
#define ZoO_DEBUG_LEARNING (0 || ZoO_DEBUG_ALL)
#endif
+#define ZoO_DEBUG_NETWORK 1
+
#ifndef ZoO_DEBUG_NETWORK
#define ZoO_DEBUG_NETWORK (0 || ZoO_DEBUG_ALL)
#endif
diff --git a/src/pervasive.h b/src/pervasive.h
index d2b0344..4cc43fe 100644
--- a/src/pervasive.h
+++ b/src/pervasive.h
@@ -39,6 +39,10 @@
#define ZoO_DEFAULT_REPLY_RATE 8
#endif
+#ifndef ZoO_MARKOV_ORDER
+ #define ZoO_MARKOV_ORDER 2
+#endif
+
typedef unsigned int ZoO_index;
#define ZoO_INDEX_MAX UINT_MAX
diff --git a/src/tool/CMakeLists.txt b/src/tool/CMakeLists.txt
index 3a1d947..3ad122b 100644
--- a/src/tool/CMakeLists.txt
+++ b/src/tool/CMakeLists.txt
@@ -1,6 +1,7 @@
set(
SRC_FILES ${SRC_FILES}
${CMAKE_CURRENT_SOURCE_DIR}/strings.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/sorted_list.c
)
set(SRC_FILES ${SRC_FILES} PARENT_SCOPE)
diff --git a/src/tool/sorted_list.c b/src/tool/sorted_list.c
new file mode 100644
index 0000000..880a4b5
--- /dev/null
+++ b/src/tool/sorted_list.c
@@ -0,0 +1,79 @@
+#include "./sorted_list.h"
+
+int ZoO_sorted_list_index_of
+(
+ ZoO_index const list_length,
+ const void * const sorted_list,
+ const void * const elem,
+ size_t const type_size,
+ int (*compare) (const void *, const void *, const void *),
+ const void * const other,
+ ZoO_index result [const restrict static 1]
+)
+{
+ int cmp;
+ ZoO_index i, current_min, current_max;
+ const char * sorted_list_access;
+
+ sorted_list_access = (char *) sorted_list;
+
+ /* This is a binary search. */
+
+ if (list_length == 0)
+ {
+ *result = 0;
+
+ return -1;
+ }
+
+ current_min = 0;
+
+ current_max = (list_length - 1);
+
+ for (;;)
+ {
+ /* FIXME: overflow-safe? */
+ i = ((current_min + current_max) / 2);
+
+ if (i == list_length)
+ {
+ /* FIXME: I don't see how this one can be true */
+ *result = list_length;
+
+ return -1;
+ }
+
+ cmp = compare(elem, (sorted_list_access + (i * type_size)), other);
+
+ if (cmp > 0)
+ {
+ if ((current_min > current_max))
+ {
+ *result = (i + 1);
+
+ return -1;
+ }
+
+ /* FIXME: overflow-safe? */
+ current_min = (i + 1);
+ }
+ else if (cmp < 0)
+ {
+ if ((current_min > current_max) || (i == 0))
+ {
+ *result = i;
+
+ return -1;
+ }
+
+ /* overflow-safe */
+ current_max = (i - 1);
+ }
+ else
+ {
+ *result = i;
+
+ return 0;
+ }
+ }
+}
diff --git a/src/tool/sorted_list.h b/src/tool/sorted_list.h
new file mode 100644
index 0000000..72bd893
--- /dev/null
+++ b/src/tool/sorted_list.h
@@ -0,0 +1,19 @@
+#ifndef _ZoO_TOOL_SORTED_LIST_H_
+#define _ZoO_TOOL_SORTED_LIST_H_
+
+#include <stdlib.h>
+
+#include "../pervasive.h"
+
+int ZoO_sorted_list_index_of
+(
+ ZoO_index const list_length,
+ const void * const sorted_list,
+ const void * const elem,
+ size_t const type_size,
+ int (*compare) (const void *, const void *, const void *),
+ const void * const other,
+ ZoO_index result [const restrict static 1]
+);
+
+#endif