diff --git a/include/damage.h b/include/damage.h
new file mode 100644
index 0000000..bd976c3
--- /dev/null
+++ b/include/damage.h
@@ -0,0 +1,13 @@
+#ifndef DAMAGE_H
+# define DAMAGE_H
+
+typedef enum {
+	PHYSICAL_DMG,
+	MAGICA_DMG,
+}	DAMAGE_TYPE;
+
+typedef struct {
+
+}	Damage;
+
+#endif
diff --git a/include/octree.h b/include/data_struct/octree.h
similarity index 100%
rename from include/octree.h
rename to include/data_struct/octree.h
diff --git a/include/queue.h b/include/data_struct/queue.h
similarity index 100%
rename from include/queue.h
rename to include/data_struct/queue.h
diff --git a/include/engine.h b/include/engine.h
index 62fe167..81fabb8 100644
--- a/include/engine.h
+++ b/include/engine.h
@@ -1,8 +1,90 @@
 #ifndef ENGINE_H
 # define ENGINE_H
 
-#include <struct.h>
 #include <render.h>
+#include <event.h>
+#include <entity.h>
+#include <damage.h>
+#include <item.h>
+#include <quest.h>
+#include <skill.h>
+#include <player.h>
+
+typedef enum {
+	CRITICAL_FAIL,
+	FAIL,
+	SUCCESS,
+	CRITICAL_SUCCESS,
+}	CHECK_STATE;
+
+
+typedef enum {
+	SPLASH_SCREEN,
+	GAMELOOP_SCREEN,
+	CREDIT_SCREEN,
+	GAMEOVER_SCREEN,
+	SERVER_SCREEN,
+	SECRET_SCREEN,
+	DEBUG_SCREEN,
+}	ENGINE_STATE_E;
+
+
+typedef enum {
+	DENDRITIC_VOLTAGE,
+	LAZENBYCOMP_LIQUID,
+	LAZENBYCOMP_SMOOTH,
+	POTRA,
+}	FONTS_ENUM;
+
+typedef struct {
+	int		key[4];//should be the number of action in PLAYER_ACTION
+	union {
+		int		key;
+		int		pad;
+		int		mouse;
+	}	press;
+	Vector2	mouse_pos;
+	Vector2	mouse_delta;
+	//int		mouse_pressed[2];
+}	Input;
+
+typedef struct {
+	int state;
+	Player player;
+	Assets assets;
+}	Context;
+
+
+typedef struct {
+	Font fonts[4];//
+	Texture	textures;//
+	Model	models;//
+	Sound	sound;
+}	Assets;
+
+//cities
+//hideout
+//tiles
+//level
+//npc
+//clan
+//army
+//team
+
+
+//typedef struct {
+//	EVENT_TYPE		trigger;
+//	ACTIVITY_STATE	required_state;
+//	bool			(*condition)(int e, Event ev);
+//	void			*(*effect)(int e, Event ev);
+//}	virtualRule;
+
+//typedef struct {
+//	EVENT_TYPE		trigger;
+//	ACTIVITY_STATE	required_state;
+//	bool			(*condition)(int e, Event ev);
+//	void			*(*effect)(int e, Event ev);
+//}	physicalRule;
 
 # ifndef ENGINE_PROTOTYPE
 #  define ENGINE_PROTOTYPE
diff --git a/include/entity.h b/include/entity.h
new file mode 100644
index 0000000..0dd16e0
--- /dev/null
+++ b/include/entity.h
@@ -0,0 +1,93 @@
+#ifndef ENTITY_H
+# define ENTITY_H
+
+#include <core.h>
+
+typedef enum {
+	ACT_IDLE,
+	ACT_WORKING,
+	ACT_FIGHTING,
+	ACT_FLEEING,
+	ACT_UNCONSCIOUS,
+	ACT_DEAD,
+}	ACTIVITY_STATE;
+
+typedef enum {
+	STATS_STRENGHT,
+	STATS_DEXTERITY,
+	STATS_WITHDOM,
+	STATS_INTELLIGENCE,
+	STATS_
+}	STATS_TYPE;
+
+typedef enum {
+	LIMB_ARM,
+	LIMB_LEG,
+	LIMB_HAND,
+	LIMB_FOOT,
+	LIMB_HEAD,
+	LIMB_NECK,
+}	LIMB_TYPE;
+
+typedef enum {
+	LIMB_DAMAGED,
+	LIMB_PARALIZED,
+	LIMB_CUT,
+	LIMB_MISSING,
+	LIMB_SEVERE,
+	LIMB_INTACT,
+	LIMB_OK,
+	LIMB_INVINCIBLE,
+}	LIMB_STATE;
+
+typedef enum {
+	ENTITY_DEAD,
+	ENTITY_ALIVE,
+	ENTITY_UNCONCIOUS,
+}	ENTITY_STATE;
+
+typedef struct {
+	int strenght;
+	int agility;
+	int toughness;
+	int	proprioception;
+	int	earing;
+	int	touch;
+	int	eyesight;
+}	BodyStats;
+
+typedef struct {
+	int	intellect;
+	int fortitude;
+	int charisma;
+	int eloquence;
+	int perception;
+}	MentalStats;
+
+typedef struct {
+	LIMB_TYPE	type;
+	LIMB_STATE	state;
+}	Limb;
+
+typedef struct {
+	Limb		limbs[10];
+	BodyStats	body_stats;
+	MentalStats	mental_stats;
+	int			mass;
+	Vector3		pos;
+	Vector3		velocity;
+	int			health;
+}	Body;
+
+typedef struct {
+	Body			body;
+	int				faction;
+	ENTITY_STATE	state;//can be multiple flag
+}	Entity;
+
+typedef struct {
+	Entity entity;
+	void	(*ai)(void);
+}	Mob;
+
+#endif
diff --git a/include/enum.h b/include/enum.h
deleted file mode 100644
index 80f6c18..0000000
--- a/include/enum.h
+++ /dev/null
@@ -1,129 +0,0 @@
-
-# ifndef ENGINE_ENUM
-#  define ENGINE_ENUM
-
-typedef enum {
-	EVT_HUNGER_TICK,
-	EVT_DAMAGE_TAKEN,
-	EVT_ITEM_OBSERVED,
-	EVT_ORDER_RECEIVED,
-	EVT_MEMORY_TRIGGERED,
-	EVT_PLAYER_INPUT,
-}	EVENT_TYPE;
-
-typedef enum {
-	ACT_IDLE,
-	ACT_WORKING,
-	ACT_FIGHTING,
-	ACT_FLEEING,
-	ACT_UNCONSCIOUS,
-	ACT_DEAD,
-}	ACTIVITY_STATE;
-
-typedef enum {
-	PHYSICAL_DMG,
-	MAGICA_DMG,
-}	DAMAGE_TYPE;
-
-typedef enum {
-	STATS_STRENGHT,
-	STATS_DEXTERITY,
-	STATS_WITHDOM,
-	STATS_INTELLIGENCE,
-	STATS_
-}	STATS_TYPE;
-
-typedef enum {
-	LIMB_ARM,
-	LIMB_LEG,
-	LIMB_HAND,
-	LIMB_FOOT,
-	LIMB_HEAD,
-	LIMB_NECK,
-}	LIMB_TYPE;
-
-typedef enum {
-	LIMB_DAMAGED,
-	LIMB_PARALIZED,
-	LIMB_CUT,
-	LIMB_MISSING,
-	LIMB_SEVERE,
-	LIMB_INTACT,
-	LIMB_OK,
-	LIMB_INVINCIBLE,
-}	LIMB_STATE;
-
-typedef enum {
-	SKILL_NONE,
-	SKILL_PASSIF,
-	SKILL_MASTERY,
-	SKILL_ACTIF,
-	SKILL_LABOR,
-	SKILL_COMBAT,
-	SKILL_KNOWLEDGE,
-	SKILL_PROGRESSION,
-	SKILL_INATE,
-}	SKILL_TYPE;
-
-typedef enum {
-	ENTITY_DEAD,
-	ENTITY_ALIVE,
-	ENTITY_UNCONCIOUS,
-}	ENTITY_STATE;
-
-typedef enum {
-	CRITICAL_FAIL,
-	FAIL,
-	SUCCESS,
-	CRITICAL_SUCCESS,
-}	CHECK_STATE;
-
-typedef enum {
-	ITEM_EQUIPABLE,
-	ITEM_CONSUMABLE,
-	ITEM_PLACEABLE,
-	ITEM_QUEST,
-}	ITEM_FLAGS;
-
-typedef enum {
-	SPLASH_SCREEN,
-	GAMELOOP_SCREEN,
-	CREDIT_SCREEN,
-	GAMEOVER_SCREEN,
-	SERVER_SCREEN,
-	SECRET_SCREEN,
-	DEBUG_SCREEN,
-}	ENGINE_STATE_E;
-
-typedef enum {
-	QUEST_SUCCESS,
-	QUEST_FAILURE,
-	QUEST_TAKEN,
-	QUEST_KNOW,
-	QUEST_UNKNOW,
-	QUEST_COMPLETE,
-}	QUEST_STATE;
-
-typedef enum {
-	ACTION_FORWARD,
-	ACTION_BACKWARD,
-	ACTION_LEFT,
-	ACTION_RIGHT,
-	ACTION_PRIMARY,
-	ACTION_SECONDARY,
-	ACTION_USE,
-	ACTION_GRAB,
-	ACTION_JUMP,
-	ACTION_DASH,
-	ACTION_SKILL1,
-	ACTION_TOOLBAR1,
-}	PLAYER_ACTION;
-
-typedef enum {
-	DENDRITIC_VOLTAGE,
-	LAZENBYCOMP_LIQUID,
-	LAZENBYCOMP_SMOOTH,
-	POTRA,
-}	FONTS_ENUM;
-
-# endif
diff --git a/include/event.h b/include/event.h
new file mode 100644
index 0000000..10785e6
--- /dev/null
+++ b/include/event.h
@@ -0,0 +1,25 @@
+#ifndef EVENT_H
+# define EVENT_H
+
+#include <core.h>
+
+typedef enum {
+	EVT_HUNGER_TICK,
+	EVT_DAMAGE_TAKEN,
+	EVT_ITEM_OBSERVED,
+	EVT_ORDER_RECEIVED,
+	EVT_MEMORY_TRIGGERED,
+	EVT_PLAYER_INPUT,
+}	EVENT_TYPE;
+
+typedef struct {
+	EVENT_TYPE	type;
+	int			value;
+	double		time;
+	char*		info;
+	int			initiator;
+	int			actors;
+	int			actions;
+}	Event;
+
+#endif
diff --git a/include/extern/external/aes.c b/include/extern/external/aes.c
new file mode 100644
index 0000000..4481f7b
--- /dev/null
+++ b/include/extern/external/aes.c
@@ -0,0 +1,572 @@
+/*
+
+This is an implementation of the AES algorithm, specifically ECB, CTR and CBC mode.
+Block size can be chosen in aes.h - available choices are AES128, AES192, AES256.
+
+The implementation is verified against the test vectors in:
+  National Institute of Standards and Technology Special Publication 800-38A 2001 ED
+
+ECB-AES128
+----------
+
+  plain-text:
+    6bc1bee22e409f96e93d7e117393172a
+    ae2d8a571e03ac9c9eb76fac45af8e51
+    30c81c46a35ce411e5fbc1191a0a52ef
+    f69f2445df4f9b17ad2b417be66c3710
+
+  key:
+    2b7e151628aed2a6abf7158809cf4f3c
+
+  resulting cipher
+    3ad77bb40d7a3660a89ecaf32466ef97 
+    f5d3d58503b9699de785895a96fdbaaf 
+    43b1cd7f598ece23881b00e3ed030688 
+    7b0c785e27e8ad3f8223207104725dd4 
+
+
+NOTE:   String length must be evenly divisible by 16byte (str_len % 16 == 0)
+        You should pad the end of the string with zeros if this is not the case.
+        For AES192/256 the key size is proportionally larger.
+
+*/
+
+
+/*****************************************************************************/
+/* Includes:                                                                 */
+/*****************************************************************************/
+#include <string.h> // CBC mode, for memset
+#include "aes.h"
+
+/*****************************************************************************/
+/* Defines:                                                                  */
+/*****************************************************************************/
+// The number of columns comprising a state in AES. This is a constant in AES. Value=4
+#define Nb 4
+
+#if defined(AES256) && (AES256 == 1)
+    #define Nk 8
+    #define Nr 14
+#elif defined(AES192) && (AES192 == 1)
+    #define Nk 6
+    #define Nr 12
+#else
+    #define Nk 4        // The number of 32 bit words in a key.
+    #define Nr 10       // The number of rounds in AES Cipher.
+#endif
+
+// jcallan@github points out that declaring Multiply as a function 
+// reduces code size considerably with the Keil ARM compiler.
+// See this link for more information: https://github.com/kokke/tiny-AES-C/pull/3
+#ifndef MULTIPLY_AS_A_FUNCTION
+  #define MULTIPLY_AS_A_FUNCTION 0
+#endif
+
+
+
+
+/*****************************************************************************/
+/* Private variables:                                                        */
+/*****************************************************************************/
+// state - array holding the intermediate results during decryption.
+typedef uint8_t state_t[4][4];
+
+
+
+// The lookup-tables are marked const so they can be placed in read-only storage instead of RAM
+// The numbers below can be computed dynamically trading ROM for RAM - 
+// This can be useful in (embedded) bootloader applications, where ROM is often limited.
+static const uint8_t sbox[256] = {
+  //0     1    2      3     4    5     6     7      8    9     A      B    C     D     E     F
+  0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+  0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+  0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+  0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+  0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+  0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+  0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+  0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+  0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+  0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+  0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+  0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+  0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+  0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+  0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+  0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 };
+
+#if (defined(CBC) && CBC == 1) || (defined(ECB) && ECB == 1)
+static const uint8_t rsbox[256] = {
+  0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+  0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+  0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+  0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+  0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+  0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+  0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+  0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+  0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+  0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+  0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+  0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+  0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+  0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+  0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+  0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d };
+#endif
+
+// The round constant word array, Rcon[i], contains the values given by 
+// x to the power (i-1) being powers of x (x is denoted as {02}) in the field GF(2^8)
+static const uint8_t Rcon[11] = {
+  0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 };
+
+/*
+ * Jordan Goulder points out in PR #12 (https://github.com/kokke/tiny-AES-C/pull/12),
+ * that you can remove most of the elements in the Rcon array, because they are unused.
+ *
+ * From Wikipedia's article on the Rijndael key schedule @ https://en.wikipedia.org/wiki/Rijndael_key_schedule#Rcon
+ * 
+ * "Only the first some of these constants are actually used – up to rcon[10] for AES-128 (as 11 round keys are needed), 
+ *  up to rcon[8] for AES-192, up to rcon[7] for AES-256. rcon[0] is not used in AES algorithm."
+ */
+
+
+/*****************************************************************************/
+/* Private functions:                                                        */
+/*****************************************************************************/
+/*
+static uint8_t getSBoxValue(uint8_t num)
+{
+  return sbox[num];
+}
+*/
+#define getSBoxValue(num) (sbox[(num)])
+
+// This function produces Nb(Nr+1) round keys. The round keys are used in each round to decrypt the states. 
+static void KeyExpansion(uint8_t* RoundKey, const uint8_t* Key)
+{
+  unsigned i, j, k;
+  uint8_t tempa[4]; // Used for the column/row operations
+  
+  // The first round key is the key itself.
+  for (i = 0; i < Nk; ++i)
+  {
+    RoundKey[(i * 4) + 0] = Key[(i * 4) + 0];
+    RoundKey[(i * 4) + 1] = Key[(i * 4) + 1];
+    RoundKey[(i * 4) + 2] = Key[(i * 4) + 2];
+    RoundKey[(i * 4) + 3] = Key[(i * 4) + 3];
+  }
+
+  // All other round keys are found from the previous round keys.
+  for (i = Nk; i < Nb * (Nr + 1); ++i)
+  {
+    {
+      k = (i - 1) * 4;
+      tempa[0]=RoundKey[k + 0];
+      tempa[1]=RoundKey[k + 1];
+      tempa[2]=RoundKey[k + 2];
+      tempa[3]=RoundKey[k + 3];
+
+    }
+
+    if (i % Nk == 0)
+    {
+      // This function shifts the 4 bytes in a word to the left once.
+      // [a0,a1,a2,a3] becomes [a1,a2,a3,a0]
+
+      // Function RotWord()
+      {
+        const uint8_t u8tmp = tempa[0];
+        tempa[0] = tempa[1];
+        tempa[1] = tempa[2];
+        tempa[2] = tempa[3];
+        tempa[3] = u8tmp;
+      }
+
+      // SubWord() is a function that takes a four-byte input word and 
+      // applies the S-box to each of the four bytes to produce an output word.
+
+      // Function Subword()
+      {
+        tempa[0] = getSBoxValue(tempa[0]);
+        tempa[1] = getSBoxValue(tempa[1]);
+        tempa[2] = getSBoxValue(tempa[2]);
+        tempa[3] = getSBoxValue(tempa[3]);
+      }
+
+      tempa[0] = tempa[0] ^ Rcon[i/Nk];
+    }
+#if defined(AES256) && (AES256 == 1)
+    if (i % Nk == 4)
+    {
+      // Function Subword()
+      {
+        tempa[0] = getSBoxValue(tempa[0]);
+        tempa[1] = getSBoxValue(tempa[1]);
+        tempa[2] = getSBoxValue(tempa[2]);
+        tempa[3] = getSBoxValue(tempa[3]);
+      }
+    }
+#endif
+    j = i * 4; k=(i - Nk) * 4;
+    RoundKey[j + 0] = RoundKey[k + 0] ^ tempa[0];
+    RoundKey[j + 1] = RoundKey[k + 1] ^ tempa[1];
+    RoundKey[j + 2] = RoundKey[k + 2] ^ tempa[2];
+    RoundKey[j + 3] = RoundKey[k + 3] ^ tempa[3];
+  }
+}
+
+void AES_init_ctx(struct AES_ctx* ctx, const uint8_t* key)
+{
+  KeyExpansion(ctx->RoundKey, key);
+}
+#if (defined(CBC) && (CBC == 1)) || (defined(CTR) && (CTR == 1))
+void AES_init_ctx_iv(struct AES_ctx* ctx, const uint8_t* key, const uint8_t* iv)
+{
+  KeyExpansion(ctx->RoundKey, key);
+  memcpy (ctx->Iv, iv, AES_BLOCKLEN);
+}
+void AES_ctx_set_iv(struct AES_ctx* ctx, const uint8_t* iv)
+{
+  memcpy (ctx->Iv, iv, AES_BLOCKLEN);
+}
+#endif
+
+// This function adds the round key to state.
+// The round key is added to the state by an XOR function.
+static void AddRoundKey(uint8_t round, state_t* state, const uint8_t* RoundKey)
+{
+  uint8_t i,j;
+  for (i = 0; i < 4; ++i)
+  {
+    for (j = 0; j < 4; ++j)
+    {
+      (*state)[i][j] ^= RoundKey[(round * Nb * 4) + (i * Nb) + j];
+    }
+  }
+}
+
+// The SubBytes Function Substitutes the values in the
+// state matrix with values in an S-box.
+static void SubBytes(state_t* state)
+{
+  uint8_t i, j;
+  for (i = 0; i < 4; ++i)
+  {
+    for (j = 0; j < 4; ++j)
+    {
+      (*state)[j][i] = getSBoxValue((*state)[j][i]);
+    }
+  }
+}
+
+// The ShiftRows() function shifts the rows in the state to the left.
+// Each row is shifted with different offset.
+// Offset = Row number. So the first row is not shifted.
+static void ShiftRows(state_t* state)
+{
+  uint8_t temp;
+
+  // Rotate first row 1 columns to left  
+  temp           = (*state)[0][1];
+  (*state)[0][1] = (*state)[1][1];
+  (*state)[1][1] = (*state)[2][1];
+  (*state)[2][1] = (*state)[3][1];
+  (*state)[3][1] = temp;
+
+  // Rotate second row 2 columns to left  
+  temp           = (*state)[0][2];
+  (*state)[0][2] = (*state)[2][2];
+  (*state)[2][2] = temp;
+
+  temp           = (*state)[1][2];
+  (*state)[1][2] = (*state)[3][2];
+  (*state)[3][2] = temp;
+
+  // Rotate third row 3 columns to left
+  temp           = (*state)[0][3];
+  (*state)[0][3] = (*state)[3][3];
+  (*state)[3][3] = (*state)[2][3];
+  (*state)[2][3] = (*state)[1][3];
+  (*state)[1][3] = temp;
+}
+
+static uint8_t xtime(uint8_t x)
+{
+  return ((x<<1) ^ (((x>>7) & 1) * 0x1b));
+}
+
+// MixColumns function mixes the columns of the state matrix
+static void MixColumns(state_t* state)
+{
+  uint8_t i;
+  uint8_t Tmp, Tm, t;
+  for (i = 0; i < 4; ++i)
+  {  
+    t   = (*state)[i][0];
+    Tmp = (*state)[i][0] ^ (*state)[i][1] ^ (*state)[i][2] ^ (*state)[i][3] ;
+    Tm  = (*state)[i][0] ^ (*state)[i][1] ; Tm = xtime(Tm);  (*state)[i][0] ^= Tm ^ Tmp ;
+    Tm  = (*state)[i][1] ^ (*state)[i][2] ; Tm = xtime(Tm);  (*state)[i][1] ^= Tm ^ Tmp ;
+    Tm  = (*state)[i][2] ^ (*state)[i][3] ; Tm = xtime(Tm);  (*state)[i][2] ^= Tm ^ Tmp ;
+    Tm  = (*state)[i][3] ^ t ;              Tm = xtime(Tm);  (*state)[i][3] ^= Tm ^ Tmp ;
+  }
+}
+
+// Multiply is used to multiply numbers in the field GF(2^8)
+// Note: The last call to xtime() is unneeded, but often ends up generating a smaller binary
+//       The compiler seems to be able to vectorize the operation better this way.
+//       See https://github.com/kokke/tiny-AES-c/pull/34
+#if MULTIPLY_AS_A_FUNCTION
+static uint8_t Multiply(uint8_t x, uint8_t y)
+{
+  return (((y & 1) * x) ^
+       ((y>>1 & 1) * xtime(x)) ^
+       ((y>>2 & 1) * xtime(xtime(x))) ^
+       ((y>>3 & 1) * xtime(xtime(xtime(x)))) ^
+       ((y>>4 & 1) * xtime(xtime(xtime(xtime(x)))))); /* this last call to xtime() can be omitted */
+  }
+#else
+#define Multiply(x, y)                                \
+      (  ((y & 1) * x) ^                              \
+      ((y>>1 & 1) * xtime(x)) ^                       \
+      ((y>>2 & 1) * xtime(xtime(x))) ^                \
+      ((y>>3 & 1) * xtime(xtime(xtime(x)))) ^         \
+      ((y>>4 & 1) * xtime(xtime(xtime(xtime(x))))))   \
+
+#endif
+
+#if (defined(CBC) && CBC == 1) || (defined(ECB) && ECB == 1)
+/*
+static uint8_t getSBoxInvert(uint8_t num)
+{
+  return rsbox[num];
+}
+*/
+#define getSBoxInvert(num) (rsbox[(num)])
+
+// MixColumns function mixes the columns of the state matrix.
+// The method used to multiply may be difficult to understand for the inexperienced.
+// Please use the references to gain more information.
+static void InvMixColumns(state_t* state)
+{
+  int i;
+  uint8_t a, b, c, d;
+  for (i = 0; i < 4; ++i)
+  { 
+    a = (*state)[i][0];
+    b = (*state)[i][1];
+    c = (*state)[i][2];
+    d = (*state)[i][3];
+
+    (*state)[i][0] = Multiply(a, 0x0e) ^ Multiply(b, 0x0b) ^ Multiply(c, 0x0d) ^ Multiply(d, 0x09);
+    (*state)[i][1] = Multiply(a, 0x09) ^ Multiply(b, 0x0e) ^ Multiply(c, 0x0b) ^ Multiply(d, 0x0d);
+    (*state)[i][2] = Multiply(a, 0x0d) ^ Multiply(b, 0x09) ^ Multiply(c, 0x0e) ^ Multiply(d, 0x0b);
+    (*state)[i][3] = Multiply(a, 0x0b) ^ Multiply(b, 0x0d) ^ Multiply(c, 0x09) ^ Multiply(d, 0x0e);
+  }
+}
+
+
+// The SubBytes Function Substitutes the values in the
+// state matrix with values in an S-box.
+static void InvSubBytes(state_t* state)
+{
+  uint8_t i, j;
+  for (i = 0; i < 4; ++i)
+  {
+    for (j = 0; j < 4; ++j)
+    {
+      (*state)[j][i] = getSBoxInvert((*state)[j][i]);
+    }
+  }
+}
+
+static void InvShiftRows(state_t* state)
+{
+  uint8_t temp;
+
+  // Rotate first row 1 columns to right  
+  temp = (*state)[3][1];
+  (*state)[3][1] = (*state)[2][1];
+  (*state)[2][1] = (*state)[1][1];
+  (*state)[1][1] = (*state)[0][1];
+  (*state)[0][1] = temp;
+
+  // Rotate second row 2 columns to right 
+  temp = (*state)[0][2];
+  (*state)[0][2] = (*state)[2][2];
+  (*state)[2][2] = temp;
+
+  temp = (*state)[1][2];
+  (*state)[1][2] = (*state)[3][2];
+  (*state)[3][2] = temp;
+
+  // Rotate third row 3 columns to right
+  temp = (*state)[0][3];
+  (*state)[0][3] = (*state)[1][3];
+  (*state)[1][3] = (*state)[2][3];
+  (*state)[2][3] = (*state)[3][3];
+  (*state)[3][3] = temp;
+}
+#endif // #if (defined(CBC) && CBC == 1) || (defined(ECB) && ECB == 1)
+
+// Cipher is the main function that encrypts the PlainText.
+static void Cipher(state_t* state, const uint8_t* RoundKey)
+{
+  uint8_t round = 0;
+
+  // Add the First round key to the state before starting the rounds.
+  AddRoundKey(0, state, RoundKey);
+
+  // There will be Nr rounds.
+  // The first Nr-1 rounds are identical.
+  // These Nr rounds are executed in the loop below.
+  // Last one without MixColumns()
+  for (round = 1; ; ++round)
+  {
+    SubBytes(state);
+    ShiftRows(state);
+    if (round == Nr) {
+      break;
+    }
+    MixColumns(state);
+    AddRoundKey(round, state, RoundKey);
+  }
+  // Add round key to last round
+  AddRoundKey(Nr, state, RoundKey);
+}
+
+#if (defined(CBC) && CBC == 1) || (defined(ECB) && ECB == 1)
+static void InvCipher(state_t* state, const uint8_t* RoundKey)
+{
+  uint8_t round = 0;
+
+  // Add the First round key to the state before starting the rounds.
+  AddRoundKey(Nr, state, RoundKey);
+
+  // There will be Nr rounds.
+  // The first Nr-1 rounds are identical.
+  // These Nr rounds are executed in the loop below.
+  // Last one without InvMixColumn()
+  for (round = (Nr - 1); ; --round)
+  {
+    InvShiftRows(state);
+    InvSubBytes(state);
+    AddRoundKey(round, state, RoundKey);
+    if (round == 0) {
+      break;
+    }
+    InvMixColumns(state);
+  }
+
+}
+#endif // #if (defined(CBC) && CBC == 1) || (defined(ECB) && ECB == 1)
+
+/*****************************************************************************/
+/* Public functions:                                                         */
+/*****************************************************************************/
+#if defined(ECB) && (ECB == 1)
+
+
+void AES_ECB_encrypt(const struct AES_ctx* ctx, uint8_t* buf)
+{
+  // The next function call encrypts the PlainText with the Key using AES algorithm.
+  Cipher((state_t*)buf, ctx->RoundKey);
+}
+
+void AES_ECB_decrypt(const struct AES_ctx* ctx, uint8_t* buf)
+{
+  // The next function call decrypts the PlainText with the Key using AES algorithm.
+  InvCipher((state_t*)buf, ctx->RoundKey);
+}
+
+
+#endif // #if defined(ECB) && (ECB == 1)
+
+
+
+
+
+#if defined(CBC) && (CBC == 1)
+
+
+static void XorWithIv(uint8_t* buf, const uint8_t* Iv)
+{
+  uint8_t i;
+  for (i = 0; i < AES_BLOCKLEN; ++i) // The block in AES is always 128bit no matter the key size
+  {
+    buf[i] ^= Iv[i];
+  }
+}
+
+void AES_CBC_encrypt_buffer(struct AES_ctx *ctx, uint8_t* buf, size_t length)
+{
+  size_t i;
+  uint8_t *Iv = ctx->Iv;
+  for (i = 0; i < length; i += AES_BLOCKLEN)
+  {
+    XorWithIv(buf, Iv);
+    Cipher((state_t*)buf, ctx->RoundKey);
+    Iv = buf;
+    buf += AES_BLOCKLEN;
+  }
+  /* store Iv in ctx for next call */
+  memcpy(ctx->Iv, Iv, AES_BLOCKLEN);
+}
+
+void AES_CBC_decrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, size_t length)
+{
+  size_t i;
+  uint8_t storeNextIv[AES_BLOCKLEN];
+  for (i = 0; i < length; i += AES_BLOCKLEN)
+  {
+    memcpy(storeNextIv, buf, AES_BLOCKLEN);
+    InvCipher((state_t*)buf, ctx->RoundKey);
+    XorWithIv(buf, ctx->Iv);
+    memcpy(ctx->Iv, storeNextIv, AES_BLOCKLEN);
+    buf += AES_BLOCKLEN;
+  }
+
+}
+
+#endif // #if defined(CBC) && (CBC == 1)
+
+
+
+#if defined(CTR) && (CTR == 1)
+
+/* Symmetrical operation: same function for encrypting as for decrypting. Note any IV/nonce should never be reused with the same key */
+void AES_CTR_xcrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, size_t length)
+{
+  uint8_t buffer[AES_BLOCKLEN];
+  
+  size_t i;
+  int bi;
+  for (i = 0, bi = AES_BLOCKLEN; i < length; ++i, ++bi)
+  {
+    if (bi == AES_BLOCKLEN) /* we need to regen xor compliment in buffer */
+    {
+      
+      memcpy(buffer, ctx->Iv, AES_BLOCKLEN);
+      Cipher((state_t*)buffer,ctx->RoundKey);
+
+      /* Increment Iv and handle overflow */
+      for (bi = (AES_BLOCKLEN - 1); bi >= 0; --bi)
+      {
+	/* inc will overflow */
+        if (ctx->Iv[bi] == 255)
+	{
+          ctx->Iv[bi] = 0;
+          continue;
+        } 
+        ctx->Iv[bi] += 1;
+        break;   
+      }
+      bi = 0;
+    }
+
+    buf[i] = (buf[i] ^ buffer[bi]);
+  }
+}
+
+#endif // #if defined(CTR) && (CTR == 1)
+
diff --git a/include/extern/external/aes.h b/include/extern/external/aes.h
new file mode 100644
index 0000000..702858a
--- /dev/null
+++ b/include/extern/external/aes.h
@@ -0,0 +1,91 @@
+#ifndef _AES_H_
+#define _AES_H_
+
+#include <stdint.h>
+#include <stddef.h>
+
+// #define the macros below to 1/0 to enable/disable the mode of operation.
+//
+// CBC enables AES encryption in CBC-mode of operation.
+// CTR enables encryption in counter-mode.
+// ECB enables the basic ECB 16-byte block algorithm. All can be enabled simultaneously.
+
+// The #ifndef-guard allows it to be configured before #include'ing or at compile time.
+#ifndef CBC
+  #define CBC 1
+#endif
+
+#ifndef ECB
+  #define ECB 1
+#endif
+
+#ifndef CTR
+  #define CTR 1
+#endif
+
+
+//#define AES128 1
+//#define AES192 1
+#define AES256 1
+
+#define AES_BLOCKLEN 16 // Block length in bytes - AES is 128b block only
+
+#if defined(AES256) && (AES256 == 1)
+    #define AES_KEYLEN 32
+    #define AES_keyExpSize 240
+#elif defined(AES192) && (AES192 == 1)
+    #define AES_KEYLEN 24
+    #define AES_keyExpSize 208
+#else
+    #define AES_KEYLEN 16   // Key length in bytes
+    #define AES_keyExpSize 176
+#endif
+
+struct AES_ctx
+{
+  uint8_t RoundKey[AES_keyExpSize];
+#if (defined(CBC) && (CBC == 1)) || (defined(CTR) && (CTR == 1))
+  uint8_t Iv[AES_BLOCKLEN];
+#endif
+};
+
+void AES_init_ctx(struct AES_ctx* ctx, const uint8_t* key);
+#if (defined(CBC) && (CBC == 1)) || (defined(CTR) && (CTR == 1))
+void AES_init_ctx_iv(struct AES_ctx* ctx, const uint8_t* key, const uint8_t* iv);
+void AES_ctx_set_iv(struct AES_ctx* ctx, const uint8_t* iv);
+#endif
+
+#if defined(ECB) && (ECB == 1)
+// buffer size is exactly AES_BLOCKLEN bytes; 
+// you need only AES_init_ctx as IV is not used in ECB 
+// NB: ECB is considered insecure for most uses
+void AES_ECB_encrypt(const struct AES_ctx* ctx, uint8_t* buf);
+void AES_ECB_decrypt(const struct AES_ctx* ctx, uint8_t* buf);
+
+#endif // #if defined(ECB) && (ECB == !)
+
+
+#if defined(CBC) && (CBC == 1)
+// buffer size MUST be mutile of AES_BLOCKLEN;
+// Suggest https://en.wikipedia.org/wiki/Padding_(cryptography)#PKCS7 for padding scheme
+// NOTES: you need to set IV in ctx via AES_init_ctx_iv() or AES_ctx_set_iv()
+//        no IV should ever be reused with the same key 
+void AES_CBC_encrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, size_t length);
+void AES_CBC_decrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, size_t length);
+
+#endif // #if defined(CBC) && (CBC == 1)
+
+
+#if defined(CTR) && (CTR == 1)
+
+// Same function for encrypting as for decrypting. 
+// IV is incremented for every block, and used after encryption as XOR-compliment for output
+// Suggesting https://en.wikipedia.org/wiki/Padding_(cryptography)#PKCS7 for padding scheme
+// NOTES: you need to set IV in ctx with AES_init_ctx_iv() or AES_ctx_set_iv()
+//        no IV should ever be reused with the same key 
+void AES_CTR_xcrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, size_t length);
+
+#endif // #if defined(CTR) && (CTR == 1)
+
+
+#endif // _AES_H_
diff --git a/include/extern/external/lz4.c b/include/extern/external/lz4.c
new file mode 100644
index 0000000..a2272cf
--- /dev/null
+++ b/include/extern/external/lz4.c
@@ -0,0 +1,2526 @@
+/*
+   LZ4 - Fast LZ compression algorithm
+   Copyright (C) 2011-2020, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+    - LZ4 homepage : http://www.lz4.org
+    - LZ4 source repository : https://github.com/lz4/lz4
+*/
+
+/*-************************************
+*  Tuning parameters
+**************************************/
+/*
+ * LZ4_HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
+ */
+#ifndef LZ4_HEAPMODE
+#  define LZ4_HEAPMODE 0
+#endif
+
+/*
+ * LZ4_ACCELERATION_DEFAULT :
+ * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
+ */
+#define LZ4_ACCELERATION_DEFAULT 1
+/*
+ * LZ4_ACCELERATION_MAX :
+ * Any "acceleration" value higher than this threshold
+ * get treated as LZ4_ACCELERATION_MAX instead (fix #876)
+ */
+#define LZ4_ACCELERATION_MAX 65537
+
+
+/*-************************************
+*  CPU Feature Detection
+**************************************/
+/* LZ4_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets which assembly generation depends on alignment.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef LZ4_FORCE_MEMORY_ACCESS   /* can be defined externally */
+#  if defined(__GNUC__) && \
+  ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \
+  || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define LZ4_FORCE_MEMORY_ACCESS 2
+#  elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__)
+#    define LZ4_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+/*
+ * LZ4_FORCE_SW_BITCOUNT
+ * Define this parameter if your target system or compiler does not support hardware bit count
+ */
+#if defined(_MSC_VER) && defined(_WIN32_WCE)   /* Visual Studio for WinCE doesn't support Hardware bit count */
+#  undef  LZ4_FORCE_SW_BITCOUNT  /* avoid double def */
+#  define LZ4_FORCE_SW_BITCOUNT
+#endif
+
+
+
+/*-************************************
+*  Dependency
+**************************************/
+/*
+ * LZ4_SRC_INCLUDED:
+ * Amalgamation flag, whether lz4.c is included
+ */
+#ifndef LZ4_SRC_INCLUDED
+#  define LZ4_SRC_INCLUDED 1
+#endif
+
+#ifndef LZ4_STATIC_LINKING_ONLY
+#define LZ4_STATIC_LINKING_ONLY
+#endif
+
+#ifndef LZ4_DISABLE_DEPRECATE_WARNINGS
+#define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */
+#endif
+
+#define LZ4_STATIC_LINKING_ONLY  /* LZ4_DISTANCE_MAX */
+#include "lz4.h"
+/* see also "memory routines" below */
+
+
+/*-************************************
+*  Compiler Options
+**************************************/
+#if defined(_MSC_VER) && (_MSC_VER >= 1400)  /* Visual Studio 2005+ */
+#  include <intrin.h>               /* only present in VS2005+ */
+#  pragma warning(disable : 4127)   /* disable: C4127: conditional expression is constant */
+#endif  /* _MSC_VER */
+
+#ifndef LZ4_FORCE_INLINE
+#  ifdef _MSC_VER    /* Visual Studio */
+#    define LZ4_FORCE_INLINE static __forceinline
+#  else
+#    if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#      ifdef __GNUC__
+#        define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
+#      else
+#        define LZ4_FORCE_INLINE static inline
+#      endif
+#    else
+#      define LZ4_FORCE_INLINE static
+#    endif /* __STDC_VERSION__ */
+#  endif  /* _MSC_VER */
+#endif /* LZ4_FORCE_INLINE */
+
+/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
+ * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
+ * together with a simple 8-byte copy loop as a fall-back path.
+ * However, this optimization hurts the decompression speed by >30%,
+ * because the execution does not go to the optimized loop
+ * for typical compressible data, and all of the preamble checks
+ * before going to the fall-back path become useless overhead.
+ * This optimization happens only with the -O3 flag, and -O2 generates
+ * a simple 8-byte copy loop.
+ * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
+ * functions are annotated with __attribute__((optimize("O2"))),
+ * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
+ * of LZ4_wildCopy8 does not affect the compression speed.
+ */
+#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
+#  define LZ4_FORCE_O2  __attribute__((optimize("O2")))
+#  undef LZ4_FORCE_INLINE
+#  define LZ4_FORCE_INLINE  static __inline __attribute__((optimize("O2"),always_inline))
+#else
+#  define LZ4_FORCE_O2
+#endif
+
+#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
+#  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
+#else
+#  define expect(expr,value)    (expr)
+#endif
+
+#ifndef likely
+#define likely(expr)     expect((expr) != 0, 1)
+#endif
+#ifndef unlikely
+#define unlikely(expr)   expect((expr) != 0, 0)
+#endif
+
+/* Should the alignment test prove unreliable, for some reason,
+ * it can be disabled by setting LZ4_ALIGN_TEST to 0 */
+#ifndef LZ4_ALIGN_TEST  /* can be externally provided */
+# define LZ4_ALIGN_TEST 1
+#endif
+
+
+/*-************************************
+*  Memory routines
+**************************************/
+#ifdef LZ4_USER_MEMORY_FUNCTIONS
+/* memory management functions can be customized by user project.
+ * Below functions must exist somewhere in the Project
+ * and be available at link time */
+void* LZ4_malloc(size_t s);
+void* LZ4_calloc(size_t n, size_t s);
+void  LZ4_free(void* p);
+# define ALLOC(s)          LZ4_malloc(s)
+# define ALLOC_AND_ZERO(s) LZ4_calloc(1,s)
+# define FREEMEM(p)        LZ4_free(p)
+#else
+# include <stdlib.h>   /* malloc, calloc, free */
+# define ALLOC(s)          malloc(s)
+# define ALLOC_AND_ZERO(s) calloc(1,s)
+# define FREEMEM(p)        free(p)
+#endif
+
+#include <string.h>   /* memset, memcpy */
+#define MEM_INIT(p,v,s)   memset((p),(v),(s))
+
+
+/*-************************************
+*  Common Constants
+**************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS   5   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
+#define MFLIMIT       12   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
+#define MATCH_SAFEGUARD_DISTANCE  ((2*WILDCOPYLENGTH) - MINMATCH)   /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
+#define FASTLOOP_SAFE_DISTANCE 64
+static const int LZ4_minLength = (MFLIMIT+1);
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define LZ4_DISTANCE_ABSOLUTE_MAX 65535
+#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX)   /* max supported by LZ4 format */
+#  error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
+#endif
+
+#define ML_BITS  4
+#define ML_MASK  ((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+
+/*-************************************
+*  Error detection
+**************************************/
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
+#  include <assert.h>
+#else
+#  ifndef assert
+#    define assert(condition) ((void)0)
+#  endif
+#endif
+
+#define LZ4_STATIC_ASSERT(c)   { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use after variable declarations */
+
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
+#  include <stdio.h>
+   static int g_debuglog_enable = 1;
+#  define DEBUGLOG(l, ...) {                          \
+        if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) {  \
+            fprintf(stderr, __FILE__ ": ");           \
+            fprintf(stderr, __VA_ARGS__);             \
+            fprintf(stderr, " \n");                   \
+    }   }
+#else
+#  define DEBUGLOG(l, ...) {}    /* disabled */
+#endif
+
+static int LZ4_isAligned(const void* ptr, size_t alignment)
+{
+    return ((size_t)ptr & (alignment -1)) == 0;
+}
+
+
+/*-************************************
+*  Types
+**************************************/
+#include <limits.h>
+#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+  typedef uintptr_t uptrval;
+#else
+# if UINT_MAX != 4294967295UL
+#   error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4"
+# endif
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+  typedef size_t              uptrval;   /* generally true, except OpenVMS-64 */
+#endif
+
+#if defined(__x86_64__)
+  typedef U64    reg_t;   /* 64-bits in x32 mode */
+#else
+  typedef size_t reg_t;   /* 32-bits in x32 mode */
+#endif
+
+typedef enum {
+    notLimited = 0,
+    limitedOutput = 1,
+    fillOutput = 2
+} limitedOutput_directive;
+
+
+/*-************************************
+*  Reading and writing into memory
+**************************************/
+
+/**
+ * LZ4 relies on memcpy with a constant size being inlined. In freestanding
+ * environments, the compiler can't assume the implementation of memcpy() is
+ * standard compliant, so it can't apply its specialized memcpy() inlining
+ * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
+ * memcpy() as if it were standard compliant, so it can inline it in freestanding
+ * environments. This is needed when decompressing the Linux Kernel, for example.
+ */
+#if defined(__GNUC__) && (__GNUC__ >= 4)
+#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
+#else
+#define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
+#endif
+
+static unsigned LZ4_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental */
+    return one.c[0];
+}
+
+
+#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
+/* lie to the compiler about data alignment; use with caution */
+
+static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
+static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; }
+static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; }
+
+static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
+
+#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) LZ4_unalign;
+
+static U16 LZ4_read16(const void* ptr) { return ((const LZ4_unalign*)ptr)->u16; }
+static U32 LZ4_read32(const void* ptr) { return ((const LZ4_unalign*)ptr)->u32; }
+static reg_t LZ4_read_ARCH(const void* ptr) { return ((const LZ4_unalign*)ptr)->uArch; }
+
+static void LZ4_write16(void* memPtr, U16 value) { ((LZ4_unalign*)memPtr)->u16 = value; }
+static void LZ4_write32(void* memPtr, U32 value) { ((LZ4_unalign*)memPtr)->u32 = value; }
+
+#else  /* safe and portable access using memcpy() */
+
+static U16 LZ4_read16(const void* memPtr)
+{
+    U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static U32 LZ4_read32(const void* memPtr)
+{
+    U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static reg_t LZ4_read_ARCH(const void* memPtr)
+{
+    reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static void LZ4_write16(void* memPtr, U16 value)
+{
+    LZ4_memcpy(memPtr, &value, sizeof(value));
+}
+
+static void LZ4_write32(void* memPtr, U32 value)
+{
+    LZ4_memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif /* LZ4_FORCE_MEMORY_ACCESS */
+
+
+static U16 LZ4_readLE16(const void* memPtr)
+{
+    if (LZ4_isLittleEndian()) {
+        return LZ4_read16(memPtr);
+    } else {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)((U16)p[0] + (p[1]<<8));
+    }
+}
+
+static void LZ4_writeLE16(void* memPtr, U16 value)
+{
+    if (LZ4_isLittleEndian()) {
+        LZ4_write16(memPtr, value);
+    } else {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE) value;
+        p[1] = (BYTE)(value>>8);
+    }
+}
+
+/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
+LZ4_FORCE_INLINE
+void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
+{
+    BYTE* d = (BYTE*)dstPtr;
+    const BYTE* s = (const BYTE*)srcPtr;
+    BYTE* const e = (BYTE*)dstEnd;
+
+    do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e);
+}
+
+static const unsigned inc32table[8] = {0, 1, 2,  1,  0,  4, 4, 4};
+static const int      dec64table[8] = {0, 0, 0, -1, -4,  1, 2, 3};
+
+
+#ifndef LZ4_FAST_DEC_LOOP
+#  if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
+#    define LZ4_FAST_DEC_LOOP 1
+#  elif defined(__aarch64__) && defined(__APPLE__)
+#    define LZ4_FAST_DEC_LOOP 1
+#  elif defined(__aarch64__) && !defined(__clang__)
+     /* On non-Apple aarch64, we disable this optimization for clang because
+      * on certain mobile chipsets, performance is reduced with clang. For
+      * more information refer to https://github.com/lz4/lz4/pull/707 */
+#    define LZ4_FAST_DEC_LOOP 1
+#  else
+#    define LZ4_FAST_DEC_LOOP 0
+#  endif
+#endif
+
+#if LZ4_FAST_DEC_LOOP
+
+LZ4_FORCE_INLINE void
+LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
+{
+    assert(srcPtr + offset == dstPtr);
+    if (offset < 8) {
+        LZ4_write32(dstPtr, 0);   /* silence an msan warning when offset==0 */
+        dstPtr[0] = srcPtr[0];
+        dstPtr[1] = srcPtr[1];
+        dstPtr[2] = srcPtr[2];
+        dstPtr[3] = srcPtr[3];
+        srcPtr += inc32table[offset];
+        LZ4_memcpy(dstPtr+4, srcPtr, 4);
+        srcPtr -= dec64table[offset];
+        dstPtr += 8;
+    } else {
+        LZ4_memcpy(dstPtr, srcPtr, 8);
+        dstPtr += 8;
+        srcPtr += 8;
+    }
+
+    LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
+}
+
+/* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
+ * this version copies two times 16 bytes (instead of one time 32 bytes)
+ * because it must be compatible with offsets >= 16. */
+LZ4_FORCE_INLINE void
+LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
+{
+    BYTE* d = (BYTE*)dstPtr;
+    const BYTE* s = (const BYTE*)srcPtr;
+    BYTE* const e = (BYTE*)dstEnd;
+
+    do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
+}
+
+/* LZ4_memcpy_using_offset()  presumes :
+ * - dstEnd >= dstPtr + MINMATCH
+ * - there is at least 8 bytes available to write after dstEnd */
+LZ4_FORCE_INLINE void
+LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
+{
+    BYTE v[8];
+
+    assert(dstEnd >= dstPtr + MINMATCH);
+
+    switch(offset) {
+    case 1:
+        MEM_INIT(v, *srcPtr, 8);
+        break;
+    case 2:
+        LZ4_memcpy(v, srcPtr, 2);
+        LZ4_memcpy(&v[2], srcPtr, 2);
+        LZ4_memcpy(&v[4], v, 4);
+        break;
+    case 4:
+        LZ4_memcpy(v, srcPtr, 4);
+        LZ4_memcpy(&v[4], srcPtr, 4);
+        break;
+    default:
+        LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
+        return;
+    }
+
+    LZ4_memcpy(dstPtr, v, 8);
+    dstPtr += 8;
+    while (dstPtr < dstEnd) {
+        LZ4_memcpy(dstPtr, v, 8);
+        dstPtr += 8;
+    }
+}
+#endif
+
+
+/*-************************************
+*  Common functions
+**************************************/
+static unsigned LZ4_NbCommonBytes (reg_t val)
+{
+    assert(val != 0);
+    if (LZ4_isLittleEndian()) {
+        if (sizeof(val) == 8) {
+#       if defined(_MSC_VER) && (_MSC_VER >= 1800) && (defined(_M_AMD64) && !defined(_M_ARM64EC)) && !defined(LZ4_FORCE_SW_BITCOUNT)
+/*-*************************************************************************************************
+* ARM64EC is a Microsoft-designed ARM64 ABI compatible with AMD64 applications on ARM64 Windows 11.
+* The ARM64EC ABI does not support AVX/AVX2/AVX512 instructions, nor their relevant intrinsics
+* including _tzcnt_u64. Therefore, we need to neuter the _tzcnt_u64 code path for ARM64EC.
+****************************************************************************************************/
+#         if defined(__clang__) && (__clang_major__ < 10)
+            /* Avoid undefined clang-cl intrinics issue.
+             * See https://github.com/lz4/lz4/pull/1017 for details. */
+            return (unsigned)__builtin_ia32_tzcnt_u64(val) >> 3;
+#         else
+            /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */
+            return (unsigned)_tzcnt_u64(val) >> 3;
+#         endif
+#       elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanForward64(&r, (U64)val);
+            return (unsigned)r >> 3;
+#       elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+                                        !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (unsigned)__builtin_ctzll((U64)val) >> 3;
+#       else
+            const U64 m = 0x0101010101010101ULL;
+            val ^= val - 1;
+            return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56);
+#       endif
+        } else /* 32 bits */ {
+#       if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r;
+            _BitScanForward(&r, (U32)val);
+            return (unsigned)r >> 3;
+#       elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+                        !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (unsigned)__builtin_ctz((U32)val) >> 3;
+#       else
+            const U32 m = 0x01010101;
+            return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24;
+#       endif
+        }
+    } else   /* Big Endian CPU */ {
+        if (sizeof(val)==8) {
+#       if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+                        !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (unsigned)__builtin_clzll((U64)val) >> 3;
+#       else
+#if 1
+            /* this method is probably faster,
+             * but adds a 128 bytes lookup table */
+            static const unsigned char ctz7_tab[128] = {
+                7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+            };
+            U64 const mask = 0x0101010101010101ULL;
+            U64 const t = (((val >> 8) - mask) | val) & mask;
+            return ctz7_tab[(t * 0x0080402010080402ULL) >> 57];
+#else
+            /* this method doesn't consume memory space like the previous one,
+             * but it contains several branches,
+             * that may end up slowing execution */
+            static const U32 by32 = sizeof(val)*4;  /* 32 on 64 bits (goal), 16 on 32 bits.
+            Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
+            Note that this code path is never triggered in 32-bits mode. */
+            unsigned r;
+            if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; }
+            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+            r += (!val);
+            return r;
+#endif
+#       endif
+        } else /* 32 bits */ {
+#       if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+                                        !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (unsigned)__builtin_clz((U32)val) >> 3;
+#       else
+            val >>= 8;
+            val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) |
+              (val + 0x00FF0000)) >> 24;
+            return (unsigned)val ^ 3;
+#       endif
+        }
+    }
+}
+
+
+#define STEPSIZE sizeof(reg_t)
+LZ4_FORCE_INLINE
+unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
+{
+    const BYTE* const pStart = pIn;
+
+    if (likely(pIn < pInLimit-(STEPSIZE-1))) {
+        reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+        if (!diff) {
+            pIn+=STEPSIZE; pMatch+=STEPSIZE;
+        } else {
+            return LZ4_NbCommonBytes(diff);
+    }   }
+
+    while (likely(pIn < pInLimit-(STEPSIZE-1))) {
+        reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+        if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
+        pIn += LZ4_NbCommonBytes(diff);
+        return (unsigned)(pIn - pStart);
+    }
+
+    if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; }
+    if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; }
+    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+    return (unsigned)(pIn - pStart);
+}
+
+
+#ifndef LZ4_COMMONDEFS_ONLY
+/*-************************************
+*  Local Constants
+**************************************/
+static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT-1));
+static const U32 LZ4_skipTrigger = 6;  /* Increase this value ==> compression run slower on incompressible data */
+
+
+/*-************************************
+*  Local Structures and types
+**************************************/
+typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
+
+/**
+ * This enum distinguishes several different modes of accessing previous
+ * content in the stream.
+ *
+ * - noDict        : There is no preceding content.
+ * - withPrefix64k : Table entries up to ctx->dictSize before the current blob
+ *                   blob being compressed are valid and refer to the preceding
+ *                   content (of length ctx->dictSize), which is available
+ *                   contiguously preceding in memory the content currently
+ *                   being compressed.
+ * - usingExtDict  : Like withPrefix64k, but the preceding content is somewhere
+ *                   else in memory, starting at ctx->dictionary with length
+ *                   ctx->dictSize.
+ * - usingDictCtx  : Everything concerning the preceding content is
+ *                   in a separate context, pointed to by ctx->dictCtx.
+ *                   ctx->dictionary, ctx->dictSize, and table entries
+ *                   in the current context that refer to positions
+ *                   preceding the beginning of the current compression are
+ *                   ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
+ *                   ->dictSize describe the location and size of the preceding
+ *                   content, and matches are found by looking in the ctx
+ *                   ->dictCtx->hashTable.
+ */
+typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
+typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
+
+
+/*-************************************
+*  Local Utils
+**************************************/
+int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
+const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; }
+int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
+int LZ4_sizeofState(void) { return LZ4_STREAMSIZE; }
+
+
+/*-****************************************
+*  Internal Definitions, used only in Tests
+*******************************************/
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize);
+
+int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
+                                     int compressedSize, int maxOutputSize,
+                                     const void* dictStart, size_t dictSize);
+
+#if defined (__cplusplus)
+}
+#endif
+
+/*-******************************
+*  Compression functions
+********************************/
+LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
+{
+    if (tableType == byU16)
+        return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
+    else
+        return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
+}
+
+LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
+{
+    const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
+    if (LZ4_isLittleEndian()) {
+        const U64 prime5bytes = 889523592379ULL;
+        return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
+    } else {
+        const U64 prime8bytes = 11400714785074694791ULL;
+        return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
+    }
+}
+
+LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType)
+{
+    if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType);
+    return LZ4_hash4(LZ4_read32(p), tableType);
+}
+
+LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
+{
+    switch (tableType)
+    {
+    default: /* fallthrough */
+    case clearedTable: { /* illegal! */ assert(0); return; }
+    case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; }
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; }
+    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; }
+    }
+}
+
+LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
+{
+    switch (tableType)
+    {
+    default: /* fallthrough */
+    case clearedTable: /* fallthrough */
+    case byPtr: { /* illegal! */ assert(0); return; }
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; }
+    case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; }
+    }
+}
+
+LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h,
+                                  void* tableBase, tableType_t const tableType,
+                            const BYTE* srcBase)
+{
+    switch (tableType)
+    {
+    case clearedTable: { /* illegal! */ assert(0); return; }
+    case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; }
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; }
+    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; }
+    }
+}
+
+LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    U32 const h = LZ4_hashPosition(p, tableType);
+    LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+/* LZ4_getIndexOnHash() :
+ * Index of match position registered in hash table.
+ * hash position must be calculated by using base+index, or dictBase+index.
+ * Assumption 1 : only valid if tableType == byU32 or byU16.
+ * Assumption 2 : h is presumed valid (within limits of hash table)
+ */
+LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
+{
+    LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2);
+    if (tableType == byU32) {
+        const U32* const hashTable = (const U32*) tableBase;
+        assert(h < (1U << (LZ4_MEMORY_USAGE-2)));
+        return hashTable[h];
+    }
+    if (tableType == byU16) {
+        const U16* const hashTable = (const U16*) tableBase;
+        assert(h < (1U << (LZ4_MEMORY_USAGE-1)));
+        return hashTable[h];
+    }
+    assert(0); return 0;  /* forbidden case */
+}
+
+static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    if (tableType == byPtr) { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; }
+    if (tableType == byU32) { const U32* const hashTable = (const U32*) tableBase; return hashTable[h] + srcBase; }
+    { const U16* const hashTable = (const U16*) tableBase; return hashTable[h] + srcBase; }   /* default, to ensure a return */
+}
+
+LZ4_FORCE_INLINE const BYTE*
+LZ4_getPosition(const BYTE* p,
+                const void* tableBase, tableType_t tableType,
+                const BYTE* srcBase)
+{
+    U32 const h = LZ4_hashPosition(p, tableType);
+    return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
+
+LZ4_FORCE_INLINE void
+LZ4_prepareTable(LZ4_stream_t_internal* const cctx,
+           const int inputSize,
+           const tableType_t tableType) {
+    /* If the table hasn't been used, it's guaranteed to be zeroed out, and is
+     * therefore safe to use no matter what mode we're in. Otherwise, we figure
+     * out if it's safe to leave as is or whether it needs to be reset.
+     */
+    if ((tableType_t)cctx->tableType != clearedTable) {
+        assert(inputSize >= 0);
+        if ((tableType_t)cctx->tableType != tableType
+          || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU)
+          || ((tableType == byU32) && cctx->currentOffset > 1 GB)
+          || tableType == byPtr
+          || inputSize >= 4 KB)
+        {
+            DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx);
+            MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE);
+            cctx->currentOffset = 0;
+            cctx->tableType = (U32)clearedTable;
+        } else {
+            DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)");
+        }
+    }
+
+    /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back,
+     * is faster than compressing without a gap.
+     * However, compressing with currentOffset == 0 is faster still,
+     * so we preserve that case.
+     */
+    if (cctx->currentOffset != 0 && tableType == byU32) {
+        DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset");
+        cctx->currentOffset += 64 KB;
+    }
+
+    /* Finally, clear history */
+    cctx->dictCtx = NULL;
+    cctx->dictionary = NULL;
+    cctx->dictSize = 0;
+}
+
+/** LZ4_compress_generic() :
+ *  inlined, to ensure branches are decided at compilation time.
+ *  Presumed already validated at this stage:
+ *  - source != NULL
+ *  - inputSize > 0
+ */
+LZ4_FORCE_INLINE int LZ4_compress_generic_validated(
+                 LZ4_stream_t_internal* const cctx,
+                 const char* const source,
+                 char* const dest,
+                 const int inputSize,
+                 int*  inputConsumed, /* only written when outputDirective == fillOutput */
+                 const int maxOutputSize,
+                 const limitedOutput_directive outputDirective,
+                 const tableType_t tableType,
+                 const dict_directive dictDirective,
+                 const dictIssue_directive dictIssue,
+                 const int acceleration)
+{
+    int result;
+    const BYTE* ip = (const BYTE*) source;
+
+    U32 const startIndex = cctx->currentOffset;
+    const BYTE* base = (const BYTE*) source - startIndex;
+    const BYTE* lowLimit;
+
+    const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx;
+    const BYTE* const dictionary =
+        dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary;
+    const U32 dictSize =
+        dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize;
+    const U32 dictDelta = (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0;   /* make indexes in dictCtx comparable with index in current context */
+
+    int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx);
+    U32 const prefixIdxLimit = startIndex - dictSize;   /* used when dictDirective == dictSmall */
+    const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary;
+    const BYTE* anchor = (const BYTE*) source;
+    const BYTE* const iend = ip + inputSize;
+    const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1;
+    const BYTE* const matchlimit = iend - LASTLITERALS;
+
+    /* the dictCtx currentOffset is indexed on the start of the dictionary,
+     * while a dictionary in the current context precedes the currentOffset */
+    const BYTE* dictBase = (dictionary == NULL) ? NULL :
+                           (dictDirective == usingDictCtx) ?
+                            dictionary + dictSize - dictCtx->currentOffset :
+                            dictionary + dictSize - startIndex;
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const olimit = op + maxOutputSize;
+
+    U32 offset = 0;
+    U32 forwardH;
+
+    DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType);
+    assert(ip != NULL);
+    /* If init conditions are not met, we don't have to mark stream
+     * as having dirty context, since no action was taken yet */
+    if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */
+    if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) { return 0; }  /* Size too large (not within 64K limit) */
+    if (tableType==byPtr) assert(dictDirective==noDict);      /* only supported use case with byPtr */
+    assert(acceleration >= 1);
+
+    lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0);
+
+    /* Update context state */
+    if (dictDirective == usingDictCtx) {
+        /* Subsequent linked blocks can't use the dictionary. */
+        /* Instead, they use the block we just compressed. */
+        cctx->dictCtx = NULL;
+        cctx->dictSize = (U32)inputSize;
+    } else {
+        cctx->dictSize += (U32)inputSize;
+    }
+    cctx->currentOffset += (U32)inputSize;
+    cctx->tableType = (U32)tableType;
+
+    if (inputSize<LZ4_minLength) goto _last_literals;        /* Input too small, no compression (all literals) */
+
+    /* First Byte */
+    LZ4_putPosition(ip, cctx->hashTable, tableType, base);
+    ip++; forwardH = LZ4_hashPosition(ip, tableType);
+
+    /* Main Loop */
+    for ( ; ; ) {
+        const BYTE* match;
+        BYTE* token;
+        const BYTE* filledIp;
+
+        /* Find a match */
+        if (tableType == byPtr) {
+            const BYTE* forwardIp = ip;
+            int step = 1;
+            int searchMatchNb = acceleration << LZ4_skipTrigger;
+            do {
+                U32 const h = forwardH;
+                ip = forwardIp;
+                forwardIp += step;
+                step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+                if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
+                assert(ip < mflimitPlusOne);
+
+                match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base);
+                forwardH = LZ4_hashPosition(forwardIp, tableType);
+                LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base);
+
+            } while ( (match+LZ4_DISTANCE_MAX < ip)
+                   || (LZ4_read32(match) != LZ4_read32(ip)) );
+
+        } else {   /* byU32, byU16 */
+
+            const BYTE* forwardIp = ip;
+            int step = 1;
+            int searchMatchNb = acceleration << LZ4_skipTrigger;
+            do {
+                U32 const h = forwardH;
+                U32 const current = (U32)(forwardIp - base);
+                U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
+                assert(matchIndex <= current);
+                assert(forwardIp - base < (ptrdiff_t)(2 GB - 1));
+                ip = forwardIp;
+                forwardIp += step;
+                step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+                if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
+                assert(ip < mflimitPlusOne);
+
+                if (dictDirective == usingDictCtx) {
+                    if (matchIndex < startIndex) {
+                        /* there was no match, try the dictionary */
+                        assert(tableType == byU32);
+                        matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
+                        match = dictBase + matchIndex;
+                        matchIndex += dictDelta;   /* make dictCtx index comparable with current context */
+                        lowLimit = dictionary;
+                    } else {
+                        match = base + matchIndex;
+                        lowLimit = (const BYTE*)source;
+                    }
+                } else if (dictDirective == usingExtDict) {
+                    if (matchIndex < startIndex) {
+                        DEBUGLOG(7, "extDict candidate: matchIndex=%5u  <  startIndex=%5u", matchIndex, startIndex);
+                        assert(startIndex - matchIndex >= MINMATCH);
+                        assert(dictBase);
+                        match = dictBase + matchIndex;
+                        lowLimit = dictionary;
+                    } else {
+                        match = base + matchIndex;
+                        lowLimit = (const BYTE*)source;
+                    }
+                } else {   /* single continuous memory segment */
+                    match = base + matchIndex;
+                }
+                forwardH = LZ4_hashPosition(forwardIp, tableType);
+                LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
+
+                DEBUGLOG(7, "candidate at pos=%u  (offset=%u \n", matchIndex, current - matchIndex);
+                if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; }    /* match outside of valid area */
+                assert(matchIndex < current);
+                if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX))
+                  && (matchIndex+LZ4_DISTANCE_MAX < current)) {
+                    continue;
+                } /* too far */
+                assert((current - matchIndex) <= LZ4_DISTANCE_MAX);  /* match now expected within distance */
+
+                if (LZ4_read32(match) == LZ4_read32(ip)) {
+                    if (maybe_extMem) offset = current - matchIndex;
+                    break;   /* match found */
+                }
+
+            } while(1);
+        }
+
+        /* Catch up */
+        filledIp = ip;
+        while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
+
+        /* Encode Literals */
+        {   unsigned const litLength = (unsigned)(ip - anchor);
+            token = op++;
+            if ((outputDirective == limitedOutput) &&  /* Check output buffer overflow */
+                (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) {
+                return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
+            }
+            if ((outputDirective == fillOutput) &&
+                (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) {
+                op--;
+                goto _last_literals;
+            }
+            if (litLength >= RUN_MASK) {
+                int len = (int)(litLength - RUN_MASK);
+                *token = (RUN_MASK<<ML_BITS);
+                for(; len >= 255 ; len-=255) *op++ = 255;
+                *op++ = (BYTE)len;
+            }
+            else *token = (BYTE)(litLength<<ML_BITS);
+
+            /* Copy Literals */
+            LZ4_wildCopy8(op, anchor, op+litLength);
+            op+=litLength;
+            DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
+                        (int)(anchor-(const BYTE*)source), litLength, (int)(ip-(const BYTE*)source));
+        }
+
+_next_match:
+        /* at this stage, the following variables must be correctly set :
+         * - ip : at start of LZ operation
+         * - match : at start of previous pattern occurrence; can be within current prefix, or within extDict
+         * - offset : if maybe_ext_memSegment==1 (constant)
+         * - lowLimit : must be == dictionary to mean "match is within extDict"; must be == source otherwise
+         * - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
+         */
+
+        if ((outputDirective == fillOutput) &&
+            (op + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit)) {
+            /* the match was too close to the end, rewind and go to last literals */
+            op = token;
+            goto _last_literals;
+        }
+
+        /* Encode Offset */
+        if (maybe_extMem) {   /* static test */
+            DEBUGLOG(6, "             with offset=%u  (ext if > %i)", offset, (int)(ip - (const BYTE*)source));
+            assert(offset <= LZ4_DISTANCE_MAX && offset > 0);
+            LZ4_writeLE16(op, (U16)offset); op+=2;
+        } else  {
+            DEBUGLOG(6, "             with offset=%u  (same segment)", (U32)(ip - match));
+            assert(ip-match <= LZ4_DISTANCE_MAX);
+            LZ4_writeLE16(op, (U16)(ip - match)); op+=2;
+        }
+
+        /* Encode MatchLength */
+        {   unsigned matchCode;
+
+            if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx)
+              && (lowLimit==dictionary) /* match within extDict */ ) {
+                const BYTE* limit = ip + (dictEnd-match);
+                assert(dictEnd > match);
+                if (limit > matchlimit) limit = matchlimit;
+                matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
+                ip += (size_t)matchCode + MINMATCH;
+                if (ip==limit) {
+                    unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit);
+                    matchCode += more;
+                    ip += more;
+                }
+                DEBUGLOG(6, "             with matchLength=%u starting in extDict", matchCode+MINMATCH);
+            } else {
+                matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
+                ip += (size_t)matchCode + MINMATCH;
+                DEBUGLOG(6, "             with matchLength=%u", matchCode+MINMATCH);
+            }
+
+            if ((outputDirective) &&    /* Check output buffer overflow */
+                (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) {
+                if (outputDirective == fillOutput) {
+                    /* Match description too long : reduce it */
+                    U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255;
+                    ip -= matchCode - newMatchCode;
+                    assert(newMatchCode < matchCode);
+                    matchCode = newMatchCode;
+                    if (unlikely(ip <= filledIp)) {
+                        /* We have already filled up to filledIp so if ip ends up less than filledIp
+                         * we have positions in the hash table beyond the current position. This is
+                         * a problem if we reuse the hash table. So we have to remove these positions
+                         * from the hash table.
+                         */
+                        const BYTE* ptr;
+                        DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip));
+                        for (ptr = ip; ptr <= filledIp; ++ptr) {
+                            U32 const h = LZ4_hashPosition(ptr, tableType);
+                            LZ4_clearHash(h, cctx->hashTable, tableType);
+                        }
+                    }
+                } else {
+                    assert(outputDirective == limitedOutput);
+                    return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
+                }
+            }
+            if (matchCode >= ML_MASK) {
+                *token += ML_MASK;
+                matchCode -= ML_MASK;
+                LZ4_write32(op, 0xFFFFFFFF);
+                while (matchCode >= 4*255) {
+                    op+=4;
+                    LZ4_write32(op, 0xFFFFFFFF);
+                    matchCode -= 4*255;
+                }
+                op += matchCode / 255;
+                *op++ = (BYTE)(matchCode % 255);
+            } else
+                *token += (BYTE)(matchCode);
+        }
+        /* Ensure we have enough space for the last literals. */
+        assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit));
+
+        anchor = ip;
+
+        /* Test end of chunk */
+        if (ip >= mflimitPlusOne) break;
+
+        /* Fill table */
+        LZ4_putPosition(ip-2, cctx->hashTable, tableType, base);
+
+        /* Test next position */
+        if (tableType == byPtr) {
+
+            match = LZ4_getPosition(ip, cctx->hashTable, tableType, base);
+            LZ4_putPosition(ip, cctx->hashTable, tableType, base);
+            if ( (match+LZ4_DISTANCE_MAX >= ip)
+              && (LZ4_read32(match) == LZ4_read32(ip)) )
+            { token=op++; *token=0; goto _next_match; }
+
+        } else {   /* byU32, byU16 */
+
+            U32 const h = LZ4_hashPosition(ip, tableType);
+            U32 const current = (U32)(ip-base);
+            U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
+            assert(matchIndex < current);
+            if (dictDirective == usingDictCtx) {
+                if (matchIndex < startIndex) {
+                    /* there was no match, try the dictionary */
+                    matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
+                    match = dictBase + matchIndex;
+                    lowLimit = dictionary;   /* required for match length counter */
+                    matchIndex += dictDelta;
+                } else {
+                    match = base + matchIndex;
+                    lowLimit = (const BYTE*)source;  /* required for match length counter */
+                }
+            } else if (dictDirective==usingExtDict) {
+                if (matchIndex < startIndex) {
+                    assert(dictBase);
+                    match = dictBase + matchIndex;
+                    lowLimit = dictionary;   /* required for match length counter */
+                } else {
+                    match = base + matchIndex;
+                    lowLimit = (const BYTE*)source;   /* required for match length counter */
+                }
+            } else {   /* single memory segment */
+                match = base + matchIndex;
+            }
+            LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
+            assert(matchIndex < current);
+            if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1)
+              && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current))
+              && (LZ4_read32(match) == LZ4_read32(ip)) ) {
+                token=op++;
+                *token=0;
+                if (maybe_extMem) offset = current - matchIndex;
+                DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
+                            (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source));
+                goto _next_match;
+            }
+        }
+
+        /* Prepare next loop */
+        forwardH = LZ4_hashPosition(++ip, tableType);
+
+    }
+
+_last_literals:
+    /* Encode Last Literals */
+    {   size_t lastRun = (size_t)(iend - anchor);
+        if ( (outputDirective) &&  /* Check output buffer overflow */
+            (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) {
+            if (outputDirective == fillOutput) {
+                /* adapt lastRun to fill 'dst' */
+                assert(olimit >= op);
+                lastRun  = (size_t)(olimit-op) - 1/*token*/;
+                lastRun -= (lastRun + 256 - RUN_MASK) / 256;  /*additional length tokens*/
+            } else {
+                assert(outputDirective == limitedOutput);
+                return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
+            }
+        }
+        DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun);
+        if (lastRun >= RUN_MASK) {
+            size_t accumulator = lastRun - RUN_MASK;
+            *op++ = RUN_MASK << ML_BITS;
+            for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
+            *op++ = (BYTE) accumulator;
+        } else {
+            *op++ = (BYTE)(lastRun<<ML_BITS);
+        }
+        LZ4_memcpy(op, anchor, lastRun);
+        ip = anchor + lastRun;
+        op += lastRun;
+    }
+
+    if (outputDirective == fillOutput) {
+        *inputConsumed = (int) (((const char*)ip)-source);
+    }
+    result = (int)(((char*)op) - dest);
+    assert(result > 0);
+    DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result);
+    return result;
+}
+
+/** LZ4_compress_generic() :
+ *  inlined, to ensure branches are decided at compilation time;
+ *  takes care of src == (NULL, 0)
+ *  and forward the rest to LZ4_compress_generic_validated */
+LZ4_FORCE_INLINE int LZ4_compress_generic(
+                 LZ4_stream_t_internal* const cctx,
+                 const char* const src,
+                 char* const dst,
+                 const int srcSize,
+                 int *inputConsumed, /* only written when outputDirective == fillOutput */
+                 const int dstCapacity,
+                 const limitedOutput_directive outputDirective,
+                 const tableType_t tableType,
+                 const dict_directive dictDirective,
+                 const dictIssue_directive dictIssue,
+                 const int acceleration)
+{
+    DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i",
+                srcSize, dstCapacity);
+
+    if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; }  /* Unsupported srcSize, too large (or negative) */
+    if (srcSize == 0) {   /* src == NULL supported if srcSize == 0 */
+        if (outputDirective != notLimited && dstCapacity <= 0) return 0;  /* no output, can't write anything */
+        DEBUGLOG(5, "Generating an empty block");
+        assert(outputDirective == notLimited || dstCapacity >= 1);
+        assert(dst != NULL);
+        dst[0] = 0;
+        if (outputDirective == fillOutput) {
+            assert (inputConsumed != NULL);
+            *inputConsumed = 0;
+        }
+        return 1;
+    }
+    assert(src != NULL);
+
+    return LZ4_compress_generic_validated(cctx, src, dst, srcSize,
+                inputConsumed, /* only written into if outputDirective == fillOutput */
+                dstCapacity, outputDirective,
+                tableType, dictDirective, dictIssue, acceleration);
+}
+
+
+int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+    LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse;
+    assert(ctx != NULL);
+    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
+    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
+    if (maxOutputSize >= LZ4_compressBound(inputSize)) {
+        if (inputSize < LZ4_64Klimit) {
+            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration);
+        } else {
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+        }
+    } else {
+        if (inputSize < LZ4_64Klimit) {
+            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
+        } else {
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+        }
+    }
+}
+
+/**
+ * LZ4_compress_fast_extState_fastReset() :
+ * A variant of LZ4_compress_fast_extState().
+ *
+ * Using this variant avoids an expensive initialization step. It is only safe
+ * to call if the state buffer is known to be correctly initialized already
+ * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of
+ * "correctly initialized").
+ */
+int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
+{
+    LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse;
+    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
+    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
+
+    if (dstCapacity >= LZ4_compressBound(srcSize)) {
+        if (srcSize < LZ4_64Klimit) {
+            const tableType_t tableType = byU16;
+            LZ4_prepareTable(ctx, srcSize, tableType);
+            if (ctx->currentOffset) {
+                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration);
+            } else {
+                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+            }
+        } else {
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+            LZ4_prepareTable(ctx, srcSize, tableType);
+            return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+        }
+    } else {
+        if (srcSize < LZ4_64Klimit) {
+            const tableType_t tableType = byU16;
+            LZ4_prepareTable(ctx, srcSize, tableType);
+            if (ctx->currentOffset) {
+                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration);
+            } else {
+                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+            }
+        } else {
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+            LZ4_prepareTable(ctx, srcSize, tableType);
+            return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+        }
+    }
+}
+
+
+int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+    int result;
+#if (LZ4_HEAPMODE)
+    LZ4_stream_t* ctxPtr = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
+    if (ctxPtr == NULL) return 0;
+#else
+    LZ4_stream_t ctx;
+    LZ4_stream_t* const ctxPtr = &ctx;
+#endif
+    result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration);
+
+#if (LZ4_HEAPMODE)
+    FREEMEM(ctxPtr);
+#endif
+    return result;
+}
+
+
+int LZ4_compress_default(const char* src, char* dst, int srcSize, int maxOutputSize)
+{
+    return LZ4_compress_fast(src, dst, srcSize, maxOutputSize, 1);
+}
+
+
+/* Note!: This function leaves the stream in an unclean/broken state!
+ * It is not safe to subsequently use the same state with a _fastReset() or
+ * _continue() call without resetting it. */
+static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize)
+{
+    void* const s = LZ4_initStream(state, sizeof (*state));
+    assert(s != NULL); (void)s;
+
+    if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {  /* compression success is guaranteed */
+        return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1);
+    } else {
+        if (*srcSizePtr < LZ4_64Klimit) {
+            return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, 1);
+        } else {
+            tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+            return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, 1);
+    }   }
+}
+
+
+int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize)
+{
+#if (LZ4_HEAPMODE)
+    LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
+    if (ctx == NULL) return 0;
+#else
+    LZ4_stream_t ctxBody;
+    LZ4_stream_t* ctx = &ctxBody;
+#endif
+
+    int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize);
+
+#if (LZ4_HEAPMODE)
+    FREEMEM(ctx);
+#endif
+    return result;
+}
+
+
+
+/*-******************************
+*  Streaming functions
+********************************/
+
+LZ4_stream_t* LZ4_createStream(void)
+{
+    LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));
+    LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal));    /* A compilation error here means LZ4_STREAMSIZE is not large enough */
+    DEBUGLOG(4, "LZ4_createStream %p", lz4s);
+    if (lz4s == NULL) return NULL;
+    LZ4_initStream(lz4s, sizeof(*lz4s));
+    return lz4s;
+}
+
+static size_t LZ4_stream_t_alignment(void)
+{
+#if LZ4_ALIGN_TEST
+    typedef struct { char c; LZ4_stream_t t; } t_a;
+    return sizeof(t_a) - sizeof(LZ4_stream_t);
+#else
+    return 1;  /* effectively disabled */
+#endif
+}
+
+LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
+{
+    DEBUGLOG(5, "LZ4_initStream");
+    if (buffer == NULL) { return NULL; }
+    if (size < sizeof(LZ4_stream_t)) { return NULL; }
+    if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL;
+    MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal));
+    return (LZ4_stream_t*)buffer;
+}
+
+/* resetStream is now deprecated,
+ * prefer initStream() which is more general */
+void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
+{
+    DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream);
+    MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal));
+}
+
+void LZ4_resetStream_fast(LZ4_stream_t* ctx) {
+    LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32);
+}
+
+int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
+{
+    if (!LZ4_stream) return 0;   /* support free on NULL */
+    DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream);
+    FREEMEM(LZ4_stream);
+    return (0);
+}
+
+
+#define HASH_UNIT sizeof(reg_t)
+int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
+{
+    LZ4_stream_t_internal* dict = &LZ4_dict->internal_donotuse;
+    const tableType_t tableType = byU32;
+    const BYTE* p = (const BYTE*)dictionary;
+    const BYTE* const dictEnd = p + dictSize;
+    const BYTE* base;
+
+    DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict);
+
+    /* It's necessary to reset the context,
+     * and not just continue it with prepareTable()
+     * to avoid any risk of generating overflowing matchIndex
+     * when compressing using this dictionary */
+    LZ4_resetStream(LZ4_dict);
+
+    /* We always increment the offset by 64 KB, since, if the dict is longer,
+     * we truncate it to the last 64k, and if it's shorter, we still want to
+     * advance by a whole window length so we can provide the guarantee that
+     * there are only valid offsets in the window, which allows an optimization
+     * in LZ4_compress_fast_continue() where it uses noDictIssue even when the
+     * dictionary isn't a full 64k. */
+    dict->currentOffset += 64 KB;
+
+    if (dictSize < (int)HASH_UNIT) {
+        return 0;
+    }
+
+    if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
+    base = dictEnd - dict->currentOffset;
+    dict->dictionary = p;
+    dict->dictSize = (U32)(dictEnd - p);
+    dict->tableType = (U32)tableType;
+
+    while (p <= dictEnd-HASH_UNIT) {
+        LZ4_putPosition(p, dict->hashTable, tableType, base);
+        p+=3;
+    }
+
+    return (int)dict->dictSize;
+}
+
+void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream)
+{
+    const LZ4_stream_t_internal* dictCtx = (dictionaryStream == NULL) ? NULL :
+        &(dictionaryStream->internal_donotuse);
+
+    DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)",
+             workingStream, dictionaryStream,
+             dictCtx != NULL ? dictCtx->dictSize : 0);
+
+    if (dictCtx != NULL) {
+        /* If the current offset is zero, we will never look in the
+         * external dictionary context, since there is no value a table
+         * entry can take that indicate a miss. In that case, we need
+         * to bump the offset to something non-zero.
+         */
+        if (workingStream->internal_donotuse.currentOffset == 0) {
+            workingStream->internal_donotuse.currentOffset = 64 KB;
+        }
+
+        /* Don't actually attach an empty dictionary.
+         */
+        if (dictCtx->dictSize == 0) {
+            dictCtx = NULL;
+        }
+    }
+    workingStream->internal_donotuse.dictCtx = dictCtx;
+}
+
+
+static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize)
+{
+    assert(nextSize >= 0);
+    if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) {   /* potential ptrdiff_t overflow (32-bits mode) */
+        /* rescale hash table */
+        U32 const delta = LZ4_dict->currentOffset - 64 KB;
+        const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
+        int i;
+        DEBUGLOG(4, "LZ4_renormDictT");
+        for (i=0; i<LZ4_HASH_SIZE_U32; i++) {
+            if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
+            else LZ4_dict->hashTable[i] -= delta;
+        }
+        LZ4_dict->currentOffset = 64 KB;
+        if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB;
+        LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
+    }
+}
+
+
+int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream,
+                                const char* source, char* dest,
+                                int inputSize, int maxOutputSize,
+                                int acceleration)
+{
+    const tableType_t tableType = byU32;
+    LZ4_stream_t_internal* const streamPtr = &LZ4_stream->internal_donotuse;
+    const char* dictEnd = streamPtr->dictSize ? (const char*)streamPtr->dictionary + streamPtr->dictSize : NULL;
+
+    DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i, dictSize=%u)", inputSize, streamPtr->dictSize);
+
+    LZ4_renormDictT(streamPtr, inputSize);   /* fix index overflow */
+    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
+    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
+
+    /* invalidate tiny dictionaries */
+    if ( (streamPtr->dictSize < 4)     /* tiny dictionary : not enough for a hash */
+      && (dictEnd != source)           /* prefix mode */
+      && (inputSize > 0)               /* tolerance : don't lose history, in case next invocation would use prefix mode */
+      && (streamPtr->dictCtx == NULL)  /* usingDictCtx */
+      ) {
+        DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary);
+        /* remove dictionary existence from history, to employ faster prefix mode */
+        streamPtr->dictSize = 0;
+        streamPtr->dictionary = (const BYTE*)source;
+        dictEnd = source;
+    }
+
+    /* Check overlapping input/dictionary space */
+    {   const char* const sourceEnd = source + inputSize;
+        if ((sourceEnd > (const char*)streamPtr->dictionary) && (sourceEnd < dictEnd)) {
+            streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
+            if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
+            if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
+            streamPtr->dictionary = (const BYTE*)dictEnd - streamPtr->dictSize;
+        }
+    }
+
+    /* prefix mode : source data follows dictionary */
+    if (dictEnd == source) {
+        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
+            return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration);
+        else
+            return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration);
+    }
+
+    /* external dictionary mode */
+    {   int result;
+        if (streamPtr->dictCtx) {
+            /* We depend here on the fact that dictCtx'es (produced by
+             * LZ4_loadDict) guarantee that their tables contain no references
+             * to offsets between dictCtx->currentOffset - 64 KB and
+             * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe
+             * to use noDictIssue even when the dict isn't a full 64 KB.
+             */
+            if (inputSize > 4 KB) {
+                /* For compressing large blobs, it is faster to pay the setup
+                 * cost to copy the dictionary's tables into the active context,
+                 * so that the compression loop is only looking into one table.
+                 */
+                LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr));
+                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
+            } else {
+                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration);
+            }
+        } else {  /* small data <= 4 KB */
+            if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
+                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration);
+            } else {
+                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
+            }
+        }
+        streamPtr->dictionary = (const BYTE*)source;
+        streamPtr->dictSize = (U32)inputSize;
+        return result;
+    }
+}
+
+
+/* Hidden debug function, to force-test external dictionary mode */
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize)
+{
+    LZ4_stream_t_internal* streamPtr = &LZ4_dict->internal_donotuse;
+    int result;
+
+    LZ4_renormDictT(streamPtr, srcSize);
+
+    if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
+        result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1);
+    } else {
+        result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
+    }
+
+    streamPtr->dictionary = (const BYTE*)source;
+    streamPtr->dictSize = (U32)srcSize;
+
+    return result;
+}
+
+
+/*! LZ4_saveDict() :
+ *  If previously compressed data block is not guaranteed to remain available at its memory location,
+ *  save it into a safer place (char* safeBuffer).
+ *  Note : no need to call LZ4_loadDict() afterwards, dictionary is immediately usable,
+ *         one can therefore call LZ4_compress_fast_continue() right after.
+ * @return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
+ */
+int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
+{
+    LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;
+
+    DEBUGLOG(5, "LZ4_saveDict : dictSize=%i, safeBuffer=%p", dictSize, safeBuffer);
+
+    if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */
+    if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; }
+
+    if (safeBuffer == NULL) assert(dictSize == 0);
+    if (dictSize > 0) {
+        const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize;
+        assert(dict->dictionary);
+        memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
+    }
+
+    dict->dictionary = (const BYTE*)safeBuffer;
+    dict->dictSize = (U32)dictSize;
+
+    return dictSize;
+}
+
+
+
+/*-*******************************
+ *  Decompression functions
+ ********************************/
+
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
+
+#undef MIN
+#define MIN(a,b)    ( (a) < (b) ? (a) : (b) )
+
+/* Read the variable-length literal or match length.
+ *
+ * ip - pointer to use as input.
+ * lencheck - end ip.  Return an error if ip advances >= lencheck.
+ * loop_check - check ip >= lencheck in body of loop.  Returns loop_error if so.
+ * initial_check - check ip >= lencheck before start of loop.  Returns initial_error if so.
+ * error (output) - error code.  Should be set to 0 before call.
+ */
+typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error;
+LZ4_FORCE_INLINE unsigned
+read_variable_length(const BYTE**ip, const BYTE* lencheck,
+                     int loop_check, int initial_check,
+                     variable_length_error* error)
+{
+    U32 length = 0;
+    U32 s;
+    if (initial_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
+        *error = initial_error;
+        return length;
+    }
+    do {
+        s = **ip;
+        (*ip)++;
+        length += s;
+        if (loop_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
+            *error = loop_error;
+            return length;
+        }
+    } while (s==255);
+
+    return length;
+}
+
+/*! LZ4_decompress_generic() :
+ *  This generic decompression function covers all use cases.
+ *  It shall be instantiated several times, using different sets of directives.
+ *  Note that it is important for performance that this function really get inlined,
+ *  in order to remove useless branches during compilation optimization.
+ */
+LZ4_FORCE_INLINE int
+LZ4_decompress_generic(
+                 const char* const src,
+                 char* const dst,
+                 int srcSize,
+                 int outputSize,         /* If endOnInput==endOnInputSize, this value is `dstCapacity` */
+
+                 endCondition_directive endOnInput,   /* endOnOutputSize, endOnInputSize */
+                 earlyEnd_directive partialDecoding,  /* full, partial */
+                 dict_directive dict,                 /* noDict, withPrefix64k, usingExtDict */
+                 const BYTE* const lowPrefix,  /* always <= dst, == dst when no prefix */
+                 const BYTE* const dictStart,  /* only if dict==usingExtDict */
+                 const size_t dictSize         /* note : = 0 if noDict */
+                 )
+{
+    if ((src == NULL) || (outputSize < 0)) { return -1; }
+
+    {   const BYTE* ip = (const BYTE*) src;
+        const BYTE* const iend = ip + srcSize;
+
+        BYTE* op = (BYTE*) dst;
+        BYTE* const oend = op + outputSize;
+        BYTE* cpy;
+
+        const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;
+
+        const int safeDecode = (endOnInput==endOnInputSize);
+        const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
+
+
+        /* Set up the "end" pointers for the shortcut. */
+        const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
+        const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
+
+        const BYTE* match;
+        size_t offset;
+        unsigned token;
+        size_t length;
+
+
+        DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
+
+        /* Special cases */
+        assert(lowPrefix <= op);
+        if ((endOnInput) && (unlikely(outputSize==0))) {
+            /* Empty output buffer */
+            if (partialDecoding) return 0;
+            return ((srcSize==1) && (*ip==0)) ? 0 : -1;
+        }
+        if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); }
+        if ((endOnInput) && unlikely(srcSize==0)) { return -1; }
+
+	/* Currently the fast loop shows a regression on qualcomm arm chips. */
+#if LZ4_FAST_DEC_LOOP
+        if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
+            DEBUGLOG(6, "skip fast decode loop");
+            goto safe_decode;
+        }
+
+        /* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */
+        while (1) {
+            /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
+            assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
+            if (endOnInput) { assert(ip < iend); }
+            token = *ip++;
+            length = token >> ML_BITS;  /* literal length */
+
+            assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
+
+            /* decode literal length */
+            if (length == RUN_MASK) {
+                variable_length_error error = ok;
+                length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
+                if (error == initial_error) { goto _output_error; }
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
+                if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
+
+                /* copy literals */
+                cpy = op+length;
+                LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
+                if (endOnInput) {  /* LZ4_decompress_safe() */
+                    if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; }
+                    LZ4_wildCopy32(op, ip, cpy);
+                } else {   /* LZ4_decompress_fast() */
+                    if (cpy>oend-8) { goto safe_literal_copy; }
+                    LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
+                                                 * it doesn't know input length, and only relies on end-of-block properties */
+                }
+                ip += length; op = cpy;
+            } else {
+                cpy = op+length;
+                if (endOnInput) {  /* LZ4_decompress_safe() */
+                    DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
+                    /* We don't need to check oend, since we check it once for each loop below */
+                    if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; }
+                    /* Literals can only be 14, but hope compilers optimize if we copy by a register size */
+                    LZ4_memcpy(op, ip, 16);
+                } else {  /* LZ4_decompress_fast() */
+                    /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
+                     * it doesn't know input length, and relies on end-of-block properties */
+                    LZ4_memcpy(op, ip, 8);
+                    if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); }
+                }
+                ip += length; op = cpy;
+            }
+
+            /* get offset */
+            offset = LZ4_readLE16(ip); ip+=2;
+            match = op - offset;
+            assert(match <= op);
+
+            /* get matchlength */
+            length = token & ML_MASK;
+
+            if (length == ML_MASK) {
+                variable_length_error error = ok;
+                if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
+                length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
+                if (error != ok) { goto _output_error; }
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
+                length += MINMATCH;
+                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
+                    goto safe_match_copy;
+                }
+            } else {
+                length += MINMATCH;
+                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
+                    goto safe_match_copy;
+                }
+
+                /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */
+                if ((dict == withPrefix64k) || (match >= lowPrefix)) {
+                    if (offset >= 8) {
+                        assert(match >= lowPrefix);
+                        assert(match <= op);
+                        assert(op + 18 <= oend);
+
+                        LZ4_memcpy(op, match, 8);
+                        LZ4_memcpy(op+8, match+8, 8);
+                        LZ4_memcpy(op+16, match+16, 2);
+                        op += length;
+                        continue;
+            }   }   }
+
+            if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
+            /* match starting within external dictionary */
+            if ((dict==usingExtDict) && (match < lowPrefix)) {
+                if (unlikely(op+length > oend-LASTLITERALS)) {
+                    if (partialDecoding) {
+                        DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
+                        length = MIN(length, (size_t)(oend-op));
+                    } else {
+                        goto _output_error;  /* end-of-block condition violated */
+                }   }
+
+                if (length <= (size_t)(lowPrefix-match)) {
+                    /* match fits entirely within external dictionary : just copy */
+                    memmove(op, dictEnd - (lowPrefix-match), length);
+                    op += length;
+                } else {
+                    /* match stretches into both external dictionary and current block */
+                    size_t const copySize = (size_t)(lowPrefix - match);
+                    size_t const restSize = length - copySize;
+                    LZ4_memcpy(op, dictEnd - copySize, copySize);
+                    op += copySize;
+                    if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
+                        BYTE* const endOfMatch = op + restSize;
+                        const BYTE* copyFrom = lowPrefix;
+                        while (op < endOfMatch) { *op++ = *copyFrom++; }
+                    } else {
+                        LZ4_memcpy(op, lowPrefix, restSize);
+                        op += restSize;
+                }   }
+                continue;
+            }
+
+            /* copy match within block */
+            cpy = op + length;
+
+            assert((op <= oend) && (oend-op >= 32));
+            if (unlikely(offset<16)) {
+                LZ4_memcpy_using_offset(op, match, cpy, offset);
+            } else {
+                LZ4_wildCopy32(op, match, cpy);
+            }
+
+            op = cpy;   /* wildcopy correction */
+        }
+    safe_decode:
+#endif
+
+        /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
+        while (1) {
+            token = *ip++;
+            length = token >> ML_BITS;  /* literal length */
+
+            assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
+
+            /* A two-stage shortcut for the most common case:
+             * 1) If the literal length is 0..14, and there is enough space,
+             * enter the shortcut and copy 16 bytes on behalf of the literals
+             * (in the fast mode, only 8 bytes can be safely copied this way).
+             * 2) Further if the match length is 4..18, copy 18 bytes in a similar
+             * manner; but we ensure that there's enough space in the output for
+             * those 18 bytes earlier, upon entering the shortcut (in other words,
+             * there is a combined check for both stages).
+             */
+            if ( (endOnInput ? length != RUN_MASK : length <= 8)
+                /* strictly "less than" on input, to re-enter the loop with at least one byte */
+              && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
+                /* Copy the literals */
+                LZ4_memcpy(op, ip, endOnInput ? 16 : 8);
+                op += length; ip += length;
+
+                /* The second stage: prepare for match copying, decode full info.
+                 * If it doesn't work out, the info won't be wasted. */
+                length = token & ML_MASK; /* match length */
+                offset = LZ4_readLE16(ip); ip += 2;
+                match = op - offset;
+                assert(match <= op); /* check overflow */
+
+                /* Do not deal with overlapping matches. */
+                if ( (length != ML_MASK)
+                  && (offset >= 8)
+                  && (dict==withPrefix64k || match >= lowPrefix) ) {
+                    /* Copy the match. */
+                    LZ4_memcpy(op + 0, match + 0, 8);
+                    LZ4_memcpy(op + 8, match + 8, 8);
+                    LZ4_memcpy(op +16, match +16, 2);
+                    op += length + MINMATCH;
+                    /* Both stages worked, load the next token. */
+                    continue;
+                }
+
+                /* The second stage didn't work out, but the info is ready.
+                 * Propel it right to the point of match copying. */
+                goto _copy_match;
+            }
+
+            /* decode literal length */
+            if (length == RUN_MASK) {
+                variable_length_error error = ok;
+                length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
+                if (error == initial_error) { goto _output_error; }
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
+                if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
+            }
+
+            /* copy literals */
+            cpy = op+length;
+#if LZ4_FAST_DEC_LOOP
+        safe_literal_copy:
+#endif
+            LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
+            if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) )
+              || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
+            {
+                /* We've either hit the input parsing restriction or the output parsing restriction.
+                 * In the normal scenario, decoding a full block, it must be the last sequence,
+                 * otherwise it's an error (invalid input or dimensions).
+                 * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
+                 */
+                if (partialDecoding) {
+                    /* Since we are partial decoding we may be in this block because of the output parsing
+                     * restriction, which is not valid since the output buffer is allowed to be undersized.
+                     */
+                    assert(endOnInput);
+                    DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
+                    DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
+                    DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
+                    DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
+                    /* Finishing in the middle of a literals segment,
+                     * due to lack of input.
+                     */
+                    if (ip+length > iend) {
+                        length = (size_t)(iend-ip);
+                        cpy = op + length;
+                    }
+                    /* Finishing in the middle of a literals segment,
+                     * due to lack of output space.
+                     */
+                    if (cpy > oend) {
+                        cpy = oend;
+                        assert(op<=oend);
+                        length = (size_t)(oend-op);
+                    }
+                } else {
+                    /* We must be on the last sequence because of the parsing limitations so check
+                     * that we exactly regenerate the original size (must be exact when !endOnInput).
+                     */
+                    if ((!endOnInput) && (cpy != oend)) { goto _output_error; }
+                     /* We must be on the last sequence (or invalid) because of the parsing limitations
+                      * so check that we exactly consume the input and don't overrun the output buffer.
+                      */
+                    if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) {
+                        DEBUGLOG(6, "should have been last run of literals")
+                        DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend);
+                        DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend);
+                        goto _output_error;
+                    }
+                }
+                memmove(op, ip, length);  /* supports overlapping memory regions; only matters for in-place decompression scenarios */
+                ip += length;
+                op += length;
+                /* Necessarily EOF when !partialDecoding.
+                 * When partialDecoding, it is EOF if we've either
+                 * filled the output buffer or
+                 * can't proceed with reading an offset for following match.
+                 */
+                if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
+                    break;
+                }
+            } else {
+                LZ4_wildCopy8(op, ip, cpy);   /* may overwrite up to WILDCOPYLENGTH beyond cpy */
+                ip += length; op = cpy;
+            }
+
+            /* get offset */
+            offset = LZ4_readLE16(ip); ip+=2;
+            match = op - offset;
+
+            /* get matchlength */
+            length = token & ML_MASK;
+
+    _copy_match:
+            if (length == ML_MASK) {
+              variable_length_error error = ok;
+              length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
+              if (error != ok) goto _output_error;
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
+            }
+            length += MINMATCH;
+
+#if LZ4_FAST_DEC_LOOP
+        safe_match_copy:
+#endif
+            if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
+            /* match starting within external dictionary */
+            if ((dict==usingExtDict) && (match < lowPrefix)) {
+                if (unlikely(op+length > oend-LASTLITERALS)) {
+                    if (partialDecoding) length = MIN(length, (size_t)(oend-op));
+                    else goto _output_error;   /* doesn't respect parsing restriction */
+                }
+
+                if (length <= (size_t)(lowPrefix-match)) {
+                    /* match fits entirely within external dictionary : just copy */
+                    memmove(op, dictEnd - (lowPrefix-match), length);
+                    op += length;
+                } else {
+                    /* match stretches into both external dictionary and current block */
+                    size_t const copySize = (size_t)(lowPrefix - match);
+                    size_t const restSize = length - copySize;
+                    LZ4_memcpy(op, dictEnd - copySize, copySize);
+                    op += copySize;
+                    if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
+                        BYTE* const endOfMatch = op + restSize;
+                        const BYTE* copyFrom = lowPrefix;
+                        while (op < endOfMatch) *op++ = *copyFrom++;
+                    } else {
+                        LZ4_memcpy(op, lowPrefix, restSize);
+                        op += restSize;
+                }   }
+                continue;
+            }
+            assert(match >= lowPrefix);
+
+            /* copy match within block */
+            cpy = op + length;
+
+            /* partialDecoding : may end anywhere within the block */
+            assert(op<=oend);
+            if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
+                size_t const mlen = MIN(length, (size_t)(oend-op));
+                const BYTE* const matchEnd = match + mlen;
+                BYTE* const copyEnd = op + mlen;
+                if (matchEnd > op) {   /* overlap copy */
+                    while (op < copyEnd) { *op++ = *match++; }
+                } else {
+                    LZ4_memcpy(op, match, mlen);
+                }
+                op = copyEnd;
+                if (op == oend) { break; }
+                continue;
+            }
+
+            if (unlikely(offset<8)) {
+                LZ4_write32(op, 0);   /* silence msan warning when offset==0 */
+                op[0] = match[0];
+                op[1] = match[1];
+                op[2] = match[2];
+                op[3] = match[3];
+                match += inc32table[offset];
+                LZ4_memcpy(op+4, match, 4);
+                match -= dec64table[offset];
+            } else {
+                LZ4_memcpy(op, match, 8);
+                match += 8;
+            }
+            op += 8;
+
+            if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
+                BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
+                if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
+                if (op < oCopyLimit) {
+                    LZ4_wildCopy8(op, match, oCopyLimit);
+                    match += oCopyLimit - op;
+                    op = oCopyLimit;
+                }
+                while (op < cpy) { *op++ = *match++; }
+            } else {
+                LZ4_memcpy(op, match, 8);
+                if (length > 16)  { LZ4_wildCopy8(op+8, match+8, cpy); }
+            }
+            op = cpy;   /* wildcopy correction */
+        }
+
+        /* end of decoding */
+        if (endOnInput) {
+            DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst));
+           return (int) (((char*)op)-dst);     /* Nb of output bytes decoded */
+       } else {
+           return (int) (((const char*)ip)-src);   /* Nb of input bytes read */
+       }
+
+        /* Overflow error detected */
+    _output_error:
+        return (int) (-(((const char*)ip)-src))-1;
+    }
+}
+
+
+/*===== Instantiate the API decoding functions. =====*/
+
+LZ4_FORCE_O2
+int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
+                                  endOnInputSize, decode_full_block, noDict,
+                                  (BYTE*)dest, NULL, 0);
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity)
+{
+    dstCapacity = MIN(targetOutputSize, dstCapacity);
+    return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity,
+                                  endOnInputSize, partial_decode,
+                                  noDict, (BYTE*)dst, NULL, 0);
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, originalSize,
+                                  endOnOutputSize, decode_full_block, withPrefix64k,
+                                  (BYTE*)dest - 64 KB, NULL, 0);
+}
+
+/*===== Instantiate a few more decoding cases, used more than once. =====*/
+
+LZ4_FORCE_O2 /* Exported, an obsolete API function. */
+int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  endOnInputSize, decode_full_block, withPrefix64k,
+                                  (BYTE*)dest - 64 KB, NULL, 0);
+}
+
+/* Another obsolete API function, paired with the previous one. */
+int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
+{
+    /* LZ4_decompress_fast doesn't validate match offsets,
+     * and thus serves well with any prefixed dictionary. */
+    return LZ4_decompress_fast(source, dest, originalSize);
+}
+
+LZ4_FORCE_O2
+static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
+                                               size_t prefixSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  endOnInputSize, decode_full_block, noDict,
+                                  (BYTE*)dest-prefixSize, NULL, 0);
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
+                                     int compressedSize, int maxOutputSize,
+                                     const void* dictStart, size_t dictSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  endOnInputSize, decode_full_block, usingExtDict,
+                                  (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+}
+
+LZ4_FORCE_O2
+static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize,
+                                       const void* dictStart, size_t dictSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, originalSize,
+                                  endOnOutputSize, decode_full_block, usingExtDict,
+                                  (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+}
+
+/* The "double dictionary" mode, for use with e.g. ring buffers: the first part
+ * of the dictionary is passed as prefix, and the second via dictStart + dictSize.
+ * These routines are used only once, in LZ4_decompress_*_continue().
+ */
+LZ4_FORCE_INLINE
+int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize,
+                                   size_t prefixSize, const void* dictStart, size_t dictSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  endOnInputSize, decode_full_block, usingExtDict,
+                                  (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
+}
+
+LZ4_FORCE_INLINE
+int LZ4_decompress_fast_doubleDict(const char* source, char* dest, int originalSize,
+                                   size_t prefixSize, const void* dictStart, size_t dictSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, originalSize,
+                                  endOnOutputSize, decode_full_block, usingExtDict,
+                                  (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
+}
+
+/*===== streaming decompression functions =====*/
+
+LZ4_streamDecode_t* LZ4_createStreamDecode(void)
+{
+    LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t));
+    LZ4_STATIC_ASSERT(LZ4_STREAMDECODESIZE >= sizeof(LZ4_streamDecode_t_internal));    /* A compilation error here means LZ4_STREAMDECODESIZE is not large enough */
+    return lz4s;
+}
+
+int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
+{
+    if (LZ4_stream == NULL) { return 0; }  /* support free on NULL */
+    FREEMEM(LZ4_stream);
+    return 0;
+}
+
+/*! LZ4_setStreamDecode() :
+ *  Use this function to instruct where to find the dictionary.
+ *  This function is not necessary if previous data is still available where it was decoded.
+ *  Loading a size of 0 is allowed (same effect as no dictionary).
+ * @return : 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
+    lz4sd->prefixSize = (size_t)dictSize;
+    if (dictSize) {
+        assert(dictionary != NULL);
+        lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize;
+    } else {
+        lz4sd->prefixEnd = (const BYTE*) dictionary;
+    }
+    lz4sd->externalDict = NULL;
+    lz4sd->extDictSize  = 0;
+    return 1;
+}
+
+/*! LZ4_decoderRingBufferSize() :
+ *  when setting a ring buffer for streaming decompression (optional scenario),
+ *  provides the minimum size of this ring buffer
+ *  to be compatible with any source respecting maxBlockSize condition.
+ *  Note : in a ring buffer scenario,
+ *  blocks are presumed decompressed next to each other.
+ *  When not enough space remains for next block (remainingSize < maxBlockSize),
+ *  decoding resumes from beginning of ring buffer.
+ * @return : minimum ring buffer size,
+ *           or 0 if there is an error (invalid maxBlockSize).
+ */
+int LZ4_decoderRingBufferSize(int maxBlockSize)
+{
+    if (maxBlockSize < 0) return 0;
+    if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0;
+    if (maxBlockSize < 16) maxBlockSize = 16;
+    return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize);
+}
+
+/*
+*_continue() :
+    These decoding functions allow decompression of multiple blocks in "streaming" mode.
+    Previously decoded blocks must still be available at the memory position where they were decoded.
+    If it's not possible, save the relevant part of decoded data into a safe buffer,
+    and indicate where it stands using LZ4_setStreamDecode()
+*/
+LZ4_FORCE_O2
+int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
+    int result;
+
+    if (lz4sd->prefixSize == 0) {
+        /* The first call, no dictionary yet. */
+        assert(lz4sd->extDictSize == 0);
+        result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = (size_t)result;
+        lz4sd->prefixEnd = (BYTE*)dest + result;
+    } else if (lz4sd->prefixEnd == (BYTE*)dest) {
+        /* They're rolling the current segment. */
+        if (lz4sd->prefixSize >= 64 KB - 1)
+            result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
+        else if (lz4sd->extDictSize == 0)
+            result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize,
+                                                         lz4sd->prefixSize);
+        else
+            result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize,
+                                                    lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize += (size_t)result;
+        lz4sd->prefixEnd  += result;
+    } else {
+        /* The buffer wraps around, or they're switching to another buffer. */
+        lz4sd->extDictSize = lz4sd->prefixSize;
+        lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+        result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize,
+                                                  lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = (size_t)result;
+        lz4sd->prefixEnd  = (BYTE*)dest + result;
+    }
+
+    return result;
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
+    int result;
+    assert(originalSize >= 0);
+
+    if (lz4sd->prefixSize == 0) {
+        assert(lz4sd->extDictSize == 0);
+        result = LZ4_decompress_fast(source, dest, originalSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = (size_t)originalSize;
+        lz4sd->prefixEnd = (BYTE*)dest + originalSize;
+    } else if (lz4sd->prefixEnd == (BYTE*)dest) {
+        if (lz4sd->prefixSize >= 64 KB - 1 || lz4sd->extDictSize == 0)
+            result = LZ4_decompress_fast(source, dest, originalSize);
+        else
+            result = LZ4_decompress_fast_doubleDict(source, dest, originalSize,
+                                                    lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize += (size_t)originalSize;
+        lz4sd->prefixEnd  += originalSize;
+    } else {
+        lz4sd->extDictSize = lz4sd->prefixSize;
+        lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+        result = LZ4_decompress_fast_extDict(source, dest, originalSize,
+                                             lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = (size_t)originalSize;
+        lz4sd->prefixEnd  = (BYTE*)dest + originalSize;
+    }
+
+    return result;
+}
+
+
+/*
+Advanced decoding functions :
+*_usingDict() :
+    These decoding functions work the same as "_continue" ones,
+    the dictionary must be explicitly provided within parameters
+*/
+
+int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
+{
+    if (dictSize==0)
+        return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
+    if (dictStart+dictSize == dest) {
+        if (dictSize >= 64 KB - 1) {
+            return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
+        }
+        assert(dictSize >= 0);
+        return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize);
+    }
+    assert(dictSize >= 0);
+    return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize);
+}
+
+int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
+{
+    if (dictSize==0 || dictStart+dictSize == dest)
+        return LZ4_decompress_fast(source, dest, originalSize);
+    assert(dictSize >= 0);
+    return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize);
+}
+
+
+/*=*************************************************
+*  Obsolete Functions
+***************************************************/
+/* obsolete compression functions */
+int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    return LZ4_compress_default(source, dest, inputSize, maxOutputSize);
+}
+int LZ4_compress(const char* src, char* dest, int srcSize)
+{
+    return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize));
+}
+int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize)
+{
+    return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1);
+}
+int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize)
+{
+    return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1);
+}
+int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity)
+{
+    return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1);
+}
+int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize)
+{
+    return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1);
+}
+
+/*
+These decompression functions are deprecated and should no longer be used.
+They are only provided here for compatibility with older user programs.
+- LZ4_uncompress is totally equivalent to LZ4_decompress_fast
+- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
+*/
+int LZ4_uncompress (const char* source, char* dest, int outputSize)
+{
+    return LZ4_decompress_fast(source, dest, outputSize);
+}
+int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize)
+{
+    return LZ4_decompress_safe(source, dest, isize, maxOutputSize);
+}
+
+/* Obsolete Streaming functions */
+
+int LZ4_sizeofStreamState(void) { return LZ4_STREAMSIZE; }
+
+int LZ4_resetStreamState(void* state, char* inputBuffer)
+{
+    (void)inputBuffer;
+    LZ4_resetStream((LZ4_stream_t*)state);
+    return 0;
+}
+
+void* LZ4_create (char* inputBuffer)
+{
+    (void)inputBuffer;
+    return LZ4_createStream();
+}
+
+char* LZ4_slideInputBuffer (void* state)
+{
+    /* avoid const char * -> char * conversion warning */
+    return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary;
+}
+
+#endif   /* LZ4_COMMONDEFS_ONLY */
diff --git a/include/extern/external/lz4.h b/include/extern/external/lz4.h
new file mode 100644
index 0000000..7c401f6
--- /dev/null
+++ b/include/extern/external/lz4.h
@@ -0,0 +1,785 @@
+/*
+ *  LZ4 - Fast LZ compression algorithm
+ *  Header File
+ *  Copyright (C) 2011-2020, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+    - LZ4 homepage : http://www.lz4.org
+    - LZ4 source repository : https://github.com/lz4/lz4
+*/
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#ifndef LZ4_H_2983827168210
+#define LZ4_H_2983827168210
+
+/* --- Dependency --- */
+#include <stddef.h>   /* size_t */
+
+
+/**
+  Introduction
+
+  LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core,
+  scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
+  multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
+
+  The LZ4 compression library provides in-memory compression and decompression functions.
+  It gives full buffer control to user.
+  Compression can be done in:
+    - a single step (described as Simple Functions)
+    - a single step, reusing a context (described in Advanced Functions)
+    - unbounded multiple steps (described as Streaming compression)
+
+  lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md).
+  Decompressing such a compressed block requires additional metadata.
+  Exact metadata depends on exact decompression function.
+  For the typical case of LZ4_decompress_safe(),
+  metadata includes block's compressed size, and maximum bound of decompressed size.
+  Each application is free to encode and pass such metadata in whichever way it wants.
+
+  lz4.h only handle blocks, it can not generate Frames.
+
+  Blocks are different from Frames (doc/lz4_Frame_format.md).
+  Frames bundle both blocks and metadata in a specified manner.
+  Embedding metadata is required for compressed data to be self-contained and portable.
+  Frame format is delivered through a companion API, declared in lz4frame.h.
+  The `lz4` CLI can only manage frames.
+*/
+
+/*^***************************************************************
+*  Export parameters
+*****************************************************************/
+/*
+*  LZ4_DLL_EXPORT :
+*  Enable exporting of functions when building a Windows DLL
+*  LZ4LIB_VISIBILITY :
+*  Control library symbols visibility.
+*/
+#ifndef LZ4LIB_VISIBILITY
+#  if defined(__GNUC__) && (__GNUC__ >= 4)
+#    define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default")))
+#  else
+#    define LZ4LIB_VISIBILITY
+#  endif
+#endif
+#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1)
+#  define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY
+#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1)
+#  define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define LZ4LIB_API LZ4LIB_VISIBILITY
+#endif
+
+/*------   Version   ------*/
+#define LZ4_VERSION_MAJOR    1    /* for breaking interface changes  */
+#define LZ4_VERSION_MINOR    9    /* for new (non-breaking) interface capabilities */
+#define LZ4_VERSION_RELEASE  3    /* for tweaks, bug-fixes, or development */
+
+#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
+
+#define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE
+#define LZ4_QUOTE(str) #str
+#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str)
+#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION)
+
+LZ4LIB_API int LZ4_versionNumber (void);  /**< library version number; useful to check dll version */
+LZ4LIB_API const char* LZ4_versionString (void);   /**< library version string; useful to check dll version */
+
+
+/*-************************************
+*  Tuning parameter
+**************************************/
+#define LZ4_MEMORY_USAGE_MIN 10
+#define LZ4_MEMORY_USAGE_DEFAULT 14
+#define LZ4_MEMORY_USAGE_MAX 20
+
+/*!
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; )
+ * Increasing memory usage improves compression ratio, at the cost of speed.
+ * Reduced memory usage may improve speed at the cost of ratio, thanks to better cache locality.
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+ */
+#ifndef LZ4_MEMORY_USAGE
+# define LZ4_MEMORY_USAGE LZ4_MEMORY_USAGE_DEFAULT
+#endif
+
+#if (LZ4_MEMORY_USAGE < LZ4_MEMORY_USAGE_MIN)
+#  error "LZ4_MEMORY_USAGE is too small !"
+#endif
+
+#if (LZ4_MEMORY_USAGE > LZ4_MEMORY_USAGE_MAX)
+#  error "LZ4_MEMORY_USAGE is too large !"
+#endif
+
+/*-************************************
+*  Simple Functions
+**************************************/
+/*! LZ4_compress_default() :
+ *  Compresses 'srcSize' bytes from buffer 'src'
+ *  into already allocated 'dst' buffer of size 'dstCapacity'.
+ *  Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
+ *  It also runs faster, so it's a recommended setting.
+ *  If the function cannot compress 'src' into a more limited 'dst' budget,
+ *  compression stops *immediately*, and the function result is zero.
+ *  In which case, 'dst' content is undefined (invalid).
+ *      srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
+ *      dstCapacity : size of buffer 'dst' (which must be already allocated)
+ *     @return  : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
+ *                or 0 if compression fails
+ * Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
+ */
+LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity);
+
+/*! LZ4_decompress_safe() :
+ *  compressedSize : is the exact complete size of the compressed block.
+ *  dstCapacity : is the size of destination buffer (which must be already allocated), presumed an upper bound of decompressed size.
+ * @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
+ *           If destination buffer is not large enough, decoding will stop and output an error code (negative value).
+ *           If the source stream is detected malformed, the function will stop decoding and return a negative result.
+ * Note 1 : This function is protected against malicious data packets :
+ *          it will never writes outside 'dst' buffer, nor read outside 'source' buffer,
+ *          even if the compressed block is maliciously modified to order the decoder to do these actions.
+ *          In such case, the decoder stops immediately, and considers the compressed block malformed.
+ * Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them.
+ *          The implementation is free to send / store / derive this information in whichever way is most beneficial.
+ *          If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead.
+ */
+LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity);
+
+
+/*-************************************
+*  Advanced Functions
+**************************************/
+#define LZ4_MAX_INPUT_SIZE        0x7E000000   /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize)  ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
+
+/*! LZ4_compressBound() :
+    Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
+    This function is primarily useful for memory allocation purposes (destination buffer size).
+    Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
+    Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize)
+        inputSize  : max supported value is LZ4_MAX_INPUT_SIZE
+        return : maximum output size in a "worst case" scenario
+              or 0, if input size is incorrect (too large or negative)
+*/
+LZ4LIB_API int LZ4_compressBound(int inputSize);
+
+/*! LZ4_compress_fast() :
+    Same as LZ4_compress_default(), but allows selection of "acceleration" factor.
+    The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
+    It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
+    An acceleration value of "1" is the same as regular LZ4_compress_default()
+    Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c).
+    Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c).
+*/
+LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+
+/*! LZ4_compress_fast_extState() :
+ *  Same as LZ4_compress_fast(), using an externally allocated memory space for its state.
+ *  Use LZ4_sizeofState() to know how much memory must be allocated,
+ *  and allocate it on 8-bytes boundaries (using `malloc()` typically).
+ *  Then, provide this buffer as `void* state` to compression function.
+ */
+LZ4LIB_API int LZ4_sizeofState(void);
+LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+
+/*! LZ4_compress_destSize() :
+ *  Reverse the logic : compresses as much data as possible from 'src' buffer
+ *  into already allocated buffer 'dst', of size >= 'targetDestSize'.
+ *  This function either compresses the entire 'src' content into 'dst' if it's large enough,
+ *  or fill 'dst' buffer completely with as much data as possible from 'src'.
+ *  note: acceleration parameter is fixed to "default".
+ *
+ * *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'.
+ *               New value is necessarily <= input value.
+ * @return : Nb bytes written into 'dst' (necessarily <= targetDestSize)
+ *           or 0 if compression fails.
+ *
+ * Note : from v1.8.2 to v1.9.1, this function had a bug (fixed un v1.9.2+):
+ *        the produced compressed content could, in specific circumstances,
+ *        require to be decompressed into a destination buffer larger
+ *        by at least 1 byte than the content to decompress.
+ *        If an application uses `LZ4_compress_destSize()`,
+ *        it's highly recommended to update liblz4 to v1.9.2 or better.
+ *        If this can't be done or ensured,
+ *        the receiving decompression function should provide
+ *        a dstCapacity which is > decompressedSize, by at least 1 byte.
+ *        See https://github.com/lz4/lz4/issues/859 for details
+ */
+LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize);
+
+
+/*! LZ4_decompress_safe_partial() :
+ *  Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
+ *  into destination buffer 'dst' of size 'dstCapacity'.
+ *  Up to 'targetOutputSize' bytes will be decoded.
+ *  The function stops decoding on reaching this objective.
+ *  This can be useful to boost performance
+ *  whenever only the beginning of a block is required.
+ *
+ * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize)
+ *           If source stream is detected malformed, function returns a negative result.
+ *
+ *  Note 1 : @return can be < targetOutputSize, if compressed block contains less data.
+ *
+ *  Note 2 : targetOutputSize must be <= dstCapacity
+ *
+ *  Note 3 : this function effectively stops decoding on reaching targetOutputSize,
+ *           so dstCapacity is kind of redundant.
+ *           This is because in older versions of this function,
+ *           decoding operation would still write complete sequences.
+ *           Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize,
+ *           it could write more bytes, though only up to dstCapacity.
+ *           Some "margin" used to be required for this operation to work properly.
+ *           Thankfully, this is no longer necessary.
+ *           The function nonetheless keeps the same signature, in an effort to preserve API compatibility.
+ *
+ *  Note 4 : If srcSize is the exact size of the block,
+ *           then targetOutputSize can be any value,
+ *           including larger than the block's decompressed size.
+ *           The function will, at most, generate block's decompressed size.
+ *
+ *  Note 5 : If srcSize is _larger_ than block's compressed size,
+ *           then targetOutputSize **MUST** be <= block's decompressed size.
+ *           Otherwise, *silent corruption will occur*.
+ */
+LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
+
+
+/*-*********************************************
+*  Streaming Compression Functions
+***********************************************/
+typedef union LZ4_stream_u LZ4_stream_t;  /* incomplete type (defined later) */
+
+LZ4LIB_API LZ4_stream_t* LZ4_createStream(void);
+LZ4LIB_API int           LZ4_freeStream (LZ4_stream_t* streamPtr);
+
+/*! LZ4_resetStream_fast() : v1.9.0+
+ *  Use this to prepare an LZ4_stream_t for a new chain of dependent blocks
+ *  (e.g., LZ4_compress_fast_continue()).
+ *
+ *  An LZ4_stream_t must be initialized once before usage.
+ *  This is automatically done when created by LZ4_createStream().
+ *  However, should the LZ4_stream_t be simply declared on stack (for example),
+ *  it's necessary to initialize it first, using LZ4_initStream().
+ *
+ *  After init, start any new stream with LZ4_resetStream_fast().
+ *  A same LZ4_stream_t can be re-used multiple times consecutively
+ *  and compress multiple streams,
+ *  provided that it starts each new stream with LZ4_resetStream_fast().
+ *
+ *  LZ4_resetStream_fast() is much faster than LZ4_initStream(),
+ *  but is not compatible with memory regions containing garbage data.
+ *
+ *  Note: it's only useful to call LZ4_resetStream_fast()
+ *        in the context of streaming compression.
+ *        The *extState* functions perform their own resets.
+ *        Invoking LZ4_resetStream_fast() before is redundant, and even counterproductive.
+ */
+LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr);
+
+/*! LZ4_loadDict() :
+ *  Use this function to reference a static dictionary into LZ4_stream_t.
+ *  The dictionary must remain available during compression.
+ *  LZ4_loadDict() triggers a reset, so any previous data will be forgotten.
+ *  The same dictionary will have to be loaded on decompression side for successful decoding.
+ *  Dictionary are useful for better compression of small data (KB range).
+ *  While LZ4 accept any input as dictionary,
+ *  results are generally better when using Zstandard's Dictionary Builder.
+ *  Loading a size of 0 is allowed, and is the same as reset.
+ * @return : loaded dictionary size, in bytes (necessarily <= 64 KB)
+ */
+LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
+
+/*! LZ4_compress_fast_continue() :
+ *  Compress 'src' content using data from previously compressed blocks, for better compression ratio.
+ * 'dst' buffer must be already allocated.
+ *  If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
+ *
+ * @return : size of compressed block
+ *           or 0 if there is an error (typically, cannot fit into 'dst').
+ *
+ *  Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block.
+ *           Each block has precise boundaries.
+ *           Each block must be decompressed separately, calling LZ4_decompress_*() with relevant metadata.
+ *           It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together.
+ *
+ *  Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory !
+ *
+ *  Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB.
+ *           Make sure that buffers are separated, by at least one byte.
+ *           This construction ensures that each block only depends on previous block.
+ *
+ *  Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
+ *
+ *  Note 5 : After an error, the stream status is undefined (invalid), it can only be reset or freed.
+ */
+LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+/*! LZ4_saveDict() :
+ *  If last 64KB data cannot be guaranteed to remain available at its current memory location,
+ *  save it into a safer place (char* safeBuffer).
+ *  This is schematically equivalent to a memcpy() followed by LZ4_loadDict(),
+ *  but is much faster, because LZ4_saveDict() doesn't need to rebuild tables.
+ * @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error.
+ */
+LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize);
+
+
+/*-**********************************************
+*  Streaming Decompression Functions
+*  Bufferless synchronous API
+************************************************/
+typedef union LZ4_streamDecode_u LZ4_streamDecode_t;   /* tracking context */
+
+/*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() :
+ *  creation / destruction of streaming decompression tracking context.
+ *  A tracking context can be re-used multiple times.
+ */
+LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void);
+LZ4LIB_API int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
+
+/*! LZ4_setStreamDecode() :
+ *  An LZ4_streamDecode_t context can be allocated once and re-used multiple times.
+ *  Use this function to start decompression of a new stream of blocks.
+ *  A dictionary can optionally be set. Use NULL or size 0 for a reset order.
+ *  Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
+ * @return : 1 if OK, 0 if error
+ */
+LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
+
+/*! LZ4_decoderRingBufferSize() : v1.8.2+
+ *  Note : in a ring buffer scenario (optional),
+ *  blocks are presumed decompressed next to each other
+ *  up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize),
+ *  at which stage it resumes from beginning of ring buffer.
+ *  When setting such a ring buffer for streaming decompression,
+ *  provides the minimum size of this ring buffer
+ *  to be compatible with any source respecting maxBlockSize condition.
+ * @return : minimum ring buffer size,
+ *           or 0 if there is an error (invalid maxBlockSize).
+ */
+LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize);
+#define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) (65536 + 14 + (maxBlockSize))  /* for static allocation; maxBlockSize presumed valid */
+
+/*! LZ4_decompress_*_continue() :
+ *  These decoding functions allow decompression of consecutive blocks in "streaming" mode.
+ *  A block is an unsplittable entity, it must be presented entirely to a decompression function.
+ *  Decompression functions only accepts one block at a time.
+ *  The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded.
+ *  If less than 64KB of data has been decoded, all the data must be present.
+ *
+ *  Special : if decompression side sets a ring buffer, it must respect one of the following conditions :
+ *  - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize).
+ *    maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes.
+ *    In which case, encoding and decoding buffers do not need to be synchronized.
+ *    Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize.
+ *  - Synchronized mode :
+ *    Decompression buffer size is _exactly_ the same as compression buffer size,
+ *    and follows exactly same update rule (block boundaries at same positions),
+ *    and decoding function is provided with exact decompressed size of each block (exception for last block of the stream),
+ *    _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB).
+ *  - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ *    In which case, encoding and decoding buffers do not need to be synchronized,
+ *    and encoding ring buffer can have any size, including small ones ( < 64 KB).
+ *
+ *  Whenever these conditions are not possible,
+ *  save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression,
+ *  then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block.
+*/
+LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity);
+
+
+/*! LZ4_decompress_*_usingDict() :
+ *  These decoding functions work the same as
+ *  a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue()
+ *  They are stand-alone, and don't need an LZ4_streamDecode_t structure.
+ *  Dictionary is presumed stable : it must remain accessible and unmodified during decompression.
+ *  Performance tip : Decompression speed can be substantially increased
+ *                    when dst == dictStart + dictSize.
+ */
+LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
+
+#endif /* LZ4_H_2983827168210 */
+
+
+/*^*************************************
+ * !!!!!!   STATIC LINKING ONLY   !!!!!!
+ ***************************************/
+
+/*-****************************************************************************
+ * Experimental section
+ *
+ * Symbols declared in this section must be considered unstable. Their
+ * signatures or semantics may change, or they may be removed altogether in the
+ * future. They are therefore only safe to depend on when the caller is
+ * statically linked against the library.
+ *
+ * To protect against unsafe usage, not only are the declarations guarded,
+ * the definitions are hidden by default
+ * when building LZ4 as a shared/dynamic library.
+ *
+ * In order to access these declarations,
+ * define LZ4_STATIC_LINKING_ONLY in your application
+ * before including LZ4's headers.
+ *
+ * In order to make their implementations accessible dynamically, you must
+ * define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library.
+ ******************************************************************************/
+
+#ifdef LZ4_STATIC_LINKING_ONLY
+
+#ifndef LZ4_STATIC_3504398509
+#define LZ4_STATIC_3504398509
+
+#ifdef LZ4_PUBLISH_STATIC_FUNCTIONS
+#define LZ4LIB_STATIC_API LZ4LIB_API
+#else
+#define LZ4LIB_STATIC_API
+#endif
+
+
+/*! LZ4_compress_fast_extState_fastReset() :
+ *  A variant of LZ4_compress_fast_extState().
+ *
+ *  Using this variant avoids an expensive initialization step.
+ *  It is only safe to call if the state buffer is known to be correctly initialized already
+ *  (see above comment on LZ4_resetStream_fast() for a definition of "correctly initialized").
+ *  From a high level, the difference is that
+ *  this function initializes the provided state with a call to something like LZ4_resetStream_fast()
+ *  while LZ4_compress_fast_extState() starts with a call to LZ4_resetStream().
+ */
+LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+/*! LZ4_attach_dictionary() :
+ *  This is an experimental API that allows
+ *  efficient use of a static dictionary many times.
+ *
+ *  Rather than re-loading the dictionary buffer into a working context before
+ *  each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a
+ *  working LZ4_stream_t, this function introduces a no-copy setup mechanism,
+ *  in which the working stream references the dictionary stream in-place.
+ *
+ *  Several assumptions are made about the state of the dictionary stream.
+ *  Currently, only streams which have been prepared by LZ4_loadDict() should
+ *  be expected to work.
+ *
+ *  Alternatively, the provided dictionaryStream may be NULL,
+ *  in which case any existing dictionary stream is unset.
+ *
+ *  If a dictionary is provided, it replaces any pre-existing stream history.
+ *  The dictionary contents are the only history that can be referenced and
+ *  logically immediately precede the data compressed in the first subsequent
+ *  compression call.
+ *
+ *  The dictionary will only remain attached to the working stream through the
+ *  first compression call, at the end of which it is cleared. The dictionary
+ *  stream (and source buffer) must remain in-place / accessible / unchanged
+ *  through the completion of the first compression call on the stream.
+ */
+LZ4LIB_STATIC_API void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream);
+
+
+/*! In-place compression and decompression
+ *
+ * It's possible to have input and output sharing the same buffer,
+ * for highly constrained memory environments.
+ * In both cases, it requires input to lay at the end of the buffer,
+ * and decompression to start at beginning of the buffer.
+ * Buffer size must feature some margin, hence be larger than final size.
+ *
+ * |<------------------------buffer--------------------------------->|
+ *                             |<-----------compressed data--------->|
+ * |<-----------decompressed size------------------>|
+ *                                                  |<----margin---->|
+ *
+ * This technique is more useful for decompression,
+ * since decompressed size is typically larger,
+ * and margin is short.
+ *
+ * In-place decompression will work inside any buffer
+ * which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize).
+ * This presumes that decompressedSize > compressedSize.
+ * Otherwise, it means compression actually expanded data,
+ * and it would be more efficient to store such data with a flag indicating it's not compressed.
+ * This can happen when data is not compressible (already compressed, or encrypted).
+ *
+ * For in-place compression, margin is larger, as it must be able to cope with both
+ * history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX,
+ * and data expansion, which can happen when input is not compressible.
+ * As a consequence, buffer size requirements are much higher,
+ * and memory savings offered by in-place compression are more limited.
+ *
+ * There are ways to limit this cost for compression :
+ * - Reduce history size, by modifying LZ4_DISTANCE_MAX.
+ *   Note that it is a compile-time constant, so all compressions will apply this limit.
+ *   Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX,
+ *   so it's a reasonable trick when inputs are known to be small.
+ * - Require the compressor to deliver a "maximum compressed size".
+ *   This is the `dstCapacity` parameter in `LZ4_compress*()`.
+ *   When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail,
+ *   in which case, the return code will be 0 (zero).
+ *   The caller must be ready for these cases to happen,
+ *   and typically design a backup scheme to send data uncompressed.
+ * The combination of both techniques can significantly reduce
+ * the amount of margin required for in-place compression.
+ *
+ * In-place compression can work in any buffer
+ * which size is >= (maxCompressedSize)
+ * with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success.
+ * LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX,
+ * so it's possible to reduce memory requirements by playing with them.
+ */
+
+#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize)          (((compressedSize) >> 8) + 32)
+#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize)   ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize))  /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */
+
+#ifndef LZ4_DISTANCE_MAX   /* history window size; can be user-defined at compile time */
+#  define LZ4_DISTANCE_MAX 65535   /* set to maximum value by default */
+#endif
+
+#define LZ4_COMPRESS_INPLACE_MARGIN                           (LZ4_DISTANCE_MAX + 32)   /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */
+#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize)   ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN)  /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */
+
+#endif   /* LZ4_STATIC_3504398509 */
+#endif   /* LZ4_STATIC_LINKING_ONLY */
+
+
+
+#ifndef LZ4_H_98237428734687
+#define LZ4_H_98237428734687
+
+/*-************************************************************
+ *  Private Definitions
+ **************************************************************
+ * Do not use these definitions directly.
+ * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
+ * Accessing members will expose user code to API and/or ABI break in future versions of the library.
+ **************************************************************/
+#define LZ4_HASHLOG   (LZ4_MEMORY_USAGE-2)
+#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)       /* required as macro for static allocation */
+
+#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+  typedef  int8_t  LZ4_i8;
+  typedef uint8_t  LZ4_byte;
+  typedef uint16_t LZ4_u16;
+  typedef uint32_t LZ4_u32;
+#else
+  typedef   signed char  LZ4_i8;
+  typedef unsigned char  LZ4_byte;
+  typedef unsigned short LZ4_u16;
+  typedef unsigned int   LZ4_u32;
+#endif
+
+typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
+struct LZ4_stream_t_internal {
+    LZ4_u32 hashTable[LZ4_HASH_SIZE_U32];
+    LZ4_u32 currentOffset;
+    LZ4_u32 tableType;
+    const LZ4_byte* dictionary;
+    const LZ4_stream_t_internal* dictCtx;
+    LZ4_u32 dictSize;
+};
+
+typedef struct {
+    const LZ4_byte* externalDict;
+    size_t extDictSize;
+    const LZ4_byte* prefixEnd;
+    size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+
+
+/*! LZ4_stream_t :
+ *  Do not use below internal definitions directly !
+ *  Declare or allocate an LZ4_stream_t instead.
+ *  LZ4_stream_t can also be created using LZ4_createStream(), which is recommended.
+ *  The structure definition can be convenient for static allocation
+ *  (on stack, or as part of larger structure).
+ *  Init this structure with LZ4_initStream() before first use.
+ *  note : only use this definition in association with static linking !
+ *  this definition is not API/ABI safe, and may change in future versions.
+ */
+#define LZ4_STREAMSIZE       ((1UL << LZ4_MEMORY_USAGE) + 32)  /* static size, for inter-version compatibility */
+#define LZ4_STREAMSIZE_VOIDP (LZ4_STREAMSIZE / sizeof(void*))
+union LZ4_stream_u {
+    void* table[LZ4_STREAMSIZE_VOIDP];
+    LZ4_stream_t_internal internal_donotuse;
+}; /* previously typedef'd to LZ4_stream_t */
+
+
+/*! LZ4_initStream() : v1.9.0+
+ *  An LZ4_stream_t structure must be initialized at least once.
+ *  This is automatically done when invoking LZ4_createStream(),
+ *  but it's not when the structure is simply declared on stack (for example).
+ *
+ *  Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t.
+ *  It can also initialize any arbitrary buffer of sufficient size,
+ *  and will @return a pointer of proper type upon initialization.
+ *
+ *  Note : initialization fails if size and alignment conditions are not respected.
+ *         In which case, the function will @return NULL.
+ *  Note2: An LZ4_stream_t structure guarantees correct alignment and size.
+ *  Note3: Before v1.9.0, use LZ4_resetStream() instead
+ */
+LZ4LIB_API LZ4_stream_t* LZ4_initStream (void* buffer, size_t size);
+
+
+/*! LZ4_streamDecode_t :
+ *  information structure to track an LZ4 stream during decompression.
+ *  init this structure  using LZ4_setStreamDecode() before first use.
+ *  note : only use in association with static linking !
+ *         this definition is not API/ABI safe,
+ *         and may change in a future version !
+ */
+#define LZ4_STREAMDECODESIZE_U64 (4 + ((sizeof(void*)==16) ? 2 : 0) /*AS-400*/ )
+#define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
+union LZ4_streamDecode_u {
+    unsigned long long table[LZ4_STREAMDECODESIZE_U64];
+    LZ4_streamDecode_t_internal internal_donotuse;
+} ;   /* previously typedef'd to LZ4_streamDecode_t */
+
+
+
+/*-************************************
+*  Obsolete Functions
+**************************************/
+
+/*! Deprecation warnings
+ *
+ *  Deprecated functions make the compiler generate a warning when invoked.
+ *  This is meant to invite users to update their source code.
+ *  Should deprecation warnings be a problem, it is generally possible to disable them,
+ *  typically with -Wno-deprecated-declarations for gcc
+ *  or _CRT_SECURE_NO_WARNINGS in Visual.
+ *
+ *  Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS
+ *  before including the header file.
+ */
+#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
+#  define LZ4_DEPRECATED(message)   /* disable deprecation warnings */
+#else
+#  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+#    define LZ4_DEPRECATED(message) [[deprecated(message)]]
+#  elif defined(_MSC_VER)
+#    define LZ4_DEPRECATED(message) __declspec(deprecated(message))
+#  elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45))
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
+#  elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
+#  else
+#    pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler")
+#    define LZ4_DEPRECATED(message)   /* disabled */
+#  endif
+#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
+
+/*! Obsolete compression functions (since v1.7.3) */
+LZ4_DEPRECATED("use LZ4_compress_default() instead")       LZ4LIB_API int LZ4_compress               (const char* src, char* dest, int srcSize);
+LZ4_DEPRECATED("use LZ4_compress_default() instead")       LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue  (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/*! Obsolete decompression functions (since v1.8.0) */
+LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize);
+LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
+
+/* Obsolete streaming functions (since v1.7.0)
+ * degraded functionality; do not use!
+ *
+ * In order to perform streaming compression, these functions depended on data
+ * that is no longer tracked in the state. They have been preserved as well as
+ * possible: using them will still produce a correct output. However, they don't
+ * actually retain any history between compression calls. The compression ratio
+ * achieved will therefore be no better than compressing each chunk
+ * independently.
+ */
+LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer);
+LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int   LZ4_sizeofStreamState(void);
+LZ4_DEPRECATED("Use LZ4_resetStream() instead")  LZ4LIB_API int   LZ4_resetStreamState(void* state, char* inputBuffer);
+LZ4_DEPRECATED("Use LZ4_saveDict() instead")     LZ4LIB_API char* LZ4_slideInputBuffer (void* state);
+
+/*! Obsolete streaming decoding functions (since v1.7.0) */
+LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
+LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
+
+/*! Obsolete LZ4_decompress_fast variants (since v1.9.0) :
+ *  These functions used to be faster than LZ4_decompress_safe(),
+ *  but this is no longer the case. They are now slower.
+ *  This is because LZ4_decompress_fast() doesn't know the input size,
+ *  and therefore must progress more cautiously into the input buffer to not read beyond the end of block.
+ *  On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability.
+ *  As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated.
+ *
+ *  The last remaining LZ4_decompress_fast() specificity is that
+ *  it can decompress a block without knowing its compressed size.
+ *  Such functionality can be achieved in a more secure manner
+ *  by employing LZ4_decompress_safe_partial().
+ *
+ *  Parameters:
+ *  originalSize : is the uncompressed size to regenerate.
+ *                 `dst` must be already allocated, its size must be >= 'originalSize' bytes.
+ * @return : number of bytes read from source buffer (== compressed size).
+ *           The function expects to finish at block's end exactly.
+ *           If the source stream is detected malformed, the function stops decoding and returns a negative result.
+ *  note : LZ4_decompress_fast*() requires originalSize. Thanks to this information, it never writes past the output buffer.
+ *         However, since it doesn't know its 'src' size, it may read an unknown amount of input, past input buffer bounds.
+ *         Also, since match offsets are not validated, match reads from 'src' may underflow too.
+ *         These issues never happen if input (compressed) data is correct.
+ *         But they may happen if input data is invalid (error or intentional tampering).
+ *         As a consequence, use these functions in trusted environments with trusted data **only**.
+ */
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead")
+LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead")
+LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_usingDict() instead")
+LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize);
+
+/*! LZ4_resetStream() :
+ *  An LZ4_stream_t structure must be initialized at least once.
+ *  This is done with LZ4_initStream(), or LZ4_resetStream().
+ *  Consider switching to LZ4_initStream(),
+ *  invoking LZ4_resetStream() will trigger deprecation warnings in the future.
+ */
+LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);
+
+
+#endif /* LZ4_H_98237428734687 */
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/include/extern/external/monocypher.c b/include/extern/external/monocypher.c
new file mode 100644
index 0000000..e056db0
--- /dev/null
+++ b/include/extern/external/monocypher.c
@@ -0,0 +1,2961 @@
+// Monocypher version 4.0.1
+//
+// This file is dual-licensed.  Choose whichever licence you want from
+// the two licences listed below.
+//
+// The first licence is a regular 2-clause BSD licence.  The second licence
+// is the CC-0 from Creative Commons. It is intended to release Monocypher
+// to the public domain.  The BSD licence serves as a fallback option.
+//
+// SPDX-License-Identifier: BSD-2-Clause OR CC0-1.0
+//
+// ------------------------------------------------------------------------
+//
+// Copyright (c) 2017-2020, Loup Vaillant
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the
+//    distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// ------------------------------------------------------------------------
+//
+// Written in 2017-2020 by Loup Vaillant
+//
+// To the extent possible under law, the author(s) have dedicated all copyright
+// and related neighboring rights to this software to the public domain
+// worldwide.  This software is distributed without any warranty.
+//
+// You should have received a copy of the CC0 Public Domain Dedication along
+// with this software.  If not, see
+// <https://creativecommons.org/publicdomain/zero/1.0/>
+
+#include "monocypher.h"
+
+#ifdef MONOCYPHER_CPP_NAMESPACE
+namespace MONOCYPHER_CPP_NAMESPACE {
+#endif
+
+/////////////////
+/// Utilities ///
+/////////////////
+#define FOR_T(type, i, start, end) for (type i = (start); i < (end); i++)
+#define FOR(i, start, end)         FOR_T(size_t, i, start, end)
+#define COPY(dst, src, size)       FOR(_i_, 0, size) (dst)[_i_] = (src)[_i_]
+#define ZERO(buf, size)            FOR(_i_, 0, size) (buf)[_i_] = 0
+#define WIPE_CTX(ctx)              crypto_wipe(ctx   , sizeof(*(ctx)))
+#define WIPE_BUFFER(buffer)        crypto_wipe(buffer, sizeof(buffer))
+#define MIN(a, b)                  ((a) <= (b) ? (a) : (b))
+#define MAX(a, b)                  ((a) >= (b) ? (a) : (b))
+
+typedef int8_t   i8;
+typedef uint8_t  u8;
+typedef int16_t  i16;
+typedef uint32_t u32;
+typedef int32_t  i32;
+typedef int64_t  i64;
+typedef uint64_t u64;
+
+static const u8 zero[128] = {0};
+
+// returns the smallest positive integer y such that
+// (x + y) % pow_2  == 0
+// Basically, it's how many bytes we need to add to "align" x.
+// Only works when pow_2 is a power of 2.
+// Note: we use ~x+1 instead of -x to avoid compiler warnings
+static size_t align(size_t x, size_t pow_2)
+{
+	return (~x + 1) & (pow_2 - 1);
+}
+
+static u32 load24_le(const u8 s[3])
+{
+	return
+		((u32)s[0] <<  0) |
+		((u32)s[1] <<  8) |
+		((u32)s[2] << 16);
+}
+
+static u32 load32_le(const u8 s[4])
+{
+	return
+		((u32)s[0] <<  0) |
+		((u32)s[1] <<  8) |
+		((u32)s[2] << 16) |
+		((u32)s[3] << 24);
+}
+
+static u64 load64_le(const u8 s[8])
+{
+	return load32_le(s) | ((u64)load32_le(s+4) << 32);
+}
+
+static void store32_le(u8 out[4], u32 in)
+{
+	out[0] =  in        & 0xff;
+	out[1] = (in >>  8) & 0xff;
+	out[2] = (in >> 16) & 0xff;
+	out[3] = (in >> 24) & 0xff;
+}
+
+static void store64_le(u8 out[8], u64 in)
+{
+	store32_le(out    , (u32)in );
+	store32_le(out + 4, in >> 32);
+}
+
+static void load32_le_buf (u32 *dst, const u8 *src, size_t size) {
+	FOR(i, 0, size) { dst[i] = load32_le(src + i*4); }
+}
+static void load64_le_buf (u64 *dst, const u8 *src, size_t size) {
+	FOR(i, 0, size) { dst[i] = load64_le(src + i*8); }
+}
+static void store32_le_buf(u8 *dst, const u32 *src, size_t size) {
+	FOR(i, 0, size) { store32_le(dst + i*4, src[i]); }
+}
+static void store64_le_buf(u8 *dst, const u64 *src, size_t size) {
+	FOR(i, 0, size) { store64_le(dst + i*8, src[i]); }
+}
+
+static u64 rotr64(u64 x, u64 n) { return (x >> n) ^ (x << (64 - n)); }
+static u32 rotl32(u32 x, u32 n) { return (x << n) ^ (x >> (32 - n)); }
+
+static int neq0(u64 diff)
+{
+	// constant time comparison to zero
+	// return diff != 0 ? -1 : 0
+	u64 half = (diff >> 32) | ((u32)diff);
+	return (1 & ((half - 1) >> 32)) - 1;
+}
+
+static u64 x16(const u8 a[16], const u8 b[16])
+{
+	return (load64_le(a + 0) ^ load64_le(b + 0))
+		|  (load64_le(a + 8) ^ load64_le(b + 8));
+}
+static u64 x32(const u8 a[32],const u8 b[32]){return x16(a,b)| x16(a+16, b+16);}
+static u64 x64(const u8 a[64],const u8 b[64]){return x32(a,b)| x32(a+32, b+32);}
+int crypto_verify16(const u8 a[16], const u8 b[16]){ return neq0(x16(a, b)); }
+int crypto_verify32(const u8 a[32], const u8 b[32]){ return neq0(x32(a, b)); }
+int crypto_verify64(const u8 a[64], const u8 b[64]){ return neq0(x64(a, b)); }
+
+void crypto_wipe(void *secret, size_t size)
+{
+	volatile u8 *v_secret = (u8*)secret;
+	ZERO(v_secret, size);
+}
+
+/////////////////
+/// Chacha 20 ///
+/////////////////
+#define QUARTERROUND(a, b, c, d)	\
+	a += b;  d = rotl32(d ^ a, 16); \
+	c += d;  b = rotl32(b ^ c, 12); \
+	a += b;  d = rotl32(d ^ a,  8); \
+	c += d;  b = rotl32(b ^ c,  7)
+
+static void chacha20_rounds(u32 out[16], const u32 in[16])
+{
+	// The temporary variables make Chacha20 10% faster.
+	u32 t0  = in[ 0];  u32 t1  = in[ 1];  u32 t2  = in[ 2];  u32 t3  = in[ 3];
+	u32 t4  = in[ 4];  u32 t5  = in[ 5];  u32 t6  = in[ 6];  u32 t7  = in[ 7];
+	u32 t8  = in[ 8];  u32 t9  = in[ 9];  u32 t10 = in[10];  u32 t11 = in[11];
+	u32 t12 = in[12];  u32 t13 = in[13];  u32 t14 = in[14];  u32 t15 = in[15];
+
+	FOR (i, 0, 10) { // 20 rounds, 2 rounds per loop.
+		QUARTERROUND(t0, t4, t8 , t12); // column 0
+		QUARTERROUND(t1, t5, t9 , t13); // column 1
+		QUARTERROUND(t2, t6, t10, t14); // column 2
+		QUARTERROUND(t3, t7, t11, t15); // column 3
+		QUARTERROUND(t0, t5, t10, t15); // diagonal 0
+		QUARTERROUND(t1, t6, t11, t12); // diagonal 1
+		QUARTERROUND(t2, t7, t8 , t13); // diagonal 2
+		QUARTERROUND(t3, t4, t9 , t14); // diagonal 3
+	}
+	out[ 0] = t0;   out[ 1] = t1;   out[ 2] = t2;   out[ 3] = t3;
+	out[ 4] = t4;   out[ 5] = t5;   out[ 6] = t6;   out[ 7] = t7;
+	out[ 8] = t8;   out[ 9] = t9;   out[10] = t10;  out[11] = t11;
+	out[12] = t12;  out[13] = t13;  out[14] = t14;  out[15] = t15;
+}
+
+static const u8 *chacha20_constant = (const u8*)"expand 32-byte k"; // 16 bytes
+
+void crypto_chacha20_h(u8 out[32], const u8 key[32], const u8 in [16])
+{
+	u32 block[16];
+	load32_le_buf(block     , chacha20_constant, 4);
+	load32_le_buf(block +  4, key              , 8);
+	load32_le_buf(block + 12, in               , 4);
+
+	chacha20_rounds(block, block);
+
+	// prevent reversal of the rounds by revealing only half of the buffer.
+	store32_le_buf(out   , block   , 4); // constant
+	store32_le_buf(out+16, block+12, 4); // counter and nonce
+	WIPE_BUFFER(block);
+}
+
+u64 crypto_chacha20_djb(u8 *cipher_text, const u8 *plain_text,
+                        size_t text_size, const u8 key[32], const u8 nonce[8],
+                        u64 ctr)
+{
+	u32 input[16];
+	load32_le_buf(input     , chacha20_constant, 4);
+	load32_le_buf(input +  4, key              , 8);
+	load32_le_buf(input + 14, nonce            , 2);
+	input[12] = (u32) ctr;
+	input[13] = (u32)(ctr >> 32);
+
+	// Whole blocks
+	u32    pool[16];
+	size_t nb_blocks = text_size >> 6;
+	FOR (i, 0, nb_blocks) {
+		chacha20_rounds(pool, input);
+		if (plain_text != 0) {
+			FOR (j, 0, 16) {
+				u32 p = pool[j] + input[j];
+				store32_le(cipher_text, p ^ load32_le(plain_text));
+				cipher_text += 4;
+				plain_text  += 4;
+			}
+		} else {
+			FOR (j, 0, 16) {
+				u32 p = pool[j] + input[j];
+				store32_le(cipher_text, p);
+				cipher_text += 4;
+			}
+		}
+		input[12]++;
+		if (input[12] == 0) {
+			input[13]++;
+		}
+	}
+	text_size &= 63;
+
+	// Last (incomplete) block
+	if (text_size > 0) {
+		if (plain_text == 0) {
+			plain_text = zero;
+		}
+		chacha20_rounds(pool, input);
+		u8 tmp[64];
+		FOR (i, 0, 16) {
+			store32_le(tmp + i*4, pool[i] + input[i]);
+		}
+		FOR (i, 0, text_size) {
+			cipher_text[i] = tmp[i] ^ plain_text[i];
+		}
+		WIPE_BUFFER(tmp);
+	}
+	ctr = input[12] + ((u64)input[13] << 32) + (text_size > 0);
+
+	WIPE_BUFFER(pool);
+	WIPE_BUFFER(input);
+	return ctr;
+}
+
+u32 crypto_chacha20_ietf(u8 *cipher_text, const u8 *plain_text,
+                         size_t text_size,
+                         const u8 key[32], const u8 nonce[12], u32 ctr)
+{
+	u64 big_ctr = ctr + ((u64)load32_le(nonce) << 32);
+	return (u32)crypto_chacha20_djb(cipher_text, plain_text, text_size,
+	                                key, nonce + 4, big_ctr);
+}
+
+u64 crypto_chacha20_x(u8 *cipher_text, const u8 *plain_text,
+                      size_t text_size,
+                      const u8 key[32], const u8 nonce[24], u64 ctr)
+{
+	u8 sub_key[32];
+	crypto_chacha20_h(sub_key, key, nonce);
+	ctr = crypto_chacha20_djb(cipher_text, plain_text, text_size,
+	                          sub_key, nonce + 16, ctr);
+	WIPE_BUFFER(sub_key);
+	return ctr;
+}
+
+/////////////////
+/// Poly 1305 ///
+/////////////////
+
+// h = (h + c) * r
+// preconditions:
+//   ctx->h <= 4_ffffffff_ffffffff_ffffffff_ffffffff
+//   ctx->r <=   0ffffffc_0ffffffc_0ffffffc_0fffffff
+//   end    <= 1
+// Postcondition:
+//   ctx->h <= 4_ffffffff_ffffffff_ffffffff_ffffffff
+static void poly_block(crypto_poly1305_ctx *ctx, const u8 in[16], unsigned end)
+{
+	u32 s[4];
+	load32_le_buf(s, in, 4);
+
+	//- PROOF Poly1305
+	//-
+	//- # Inputs & preconditions
+	//- ctx->h[0] = u32()
+	//- ctx->h[1] = u32()
+	//- ctx->h[2] = u32()
+	//- ctx->h[3] = u32()
+	//- ctx->h[4] = u32(limit = 4)
+	//-
+	//- ctx->r[0] = u32(limit = 0x0fffffff)
+	//- ctx->r[1] = u32(limit = 0x0ffffffc)
+	//- ctx->r[2] = u32(limit = 0x0ffffffc)
+	//- ctx->r[3] = u32(limit = 0x0ffffffc)
+	//-
+	//- s[0] = u32()
+	//- s[1] = u32()
+	//- s[2] = u32()
+	//- s[3] = u32()
+	//-
+	//- end = unsigned(limit = 1)
+
+	// s = h + c, without carry propagation
+	const u64 s0 = ctx->h[0] + (u64)s[0]; // s0 <= 1_fffffffe
+	const u64 s1 = ctx->h[1] + (u64)s[1]; // s1 <= 1_fffffffe
+	const u64 s2 = ctx->h[2] + (u64)s[2]; // s2 <= 1_fffffffe
+	const u64 s3 = ctx->h[3] + (u64)s[3]; // s3 <= 1_fffffffe
+	const u32 s4 = ctx->h[4] + end;       // s4 <=          5
+
+	// Local all the things!
+	const u32 r0 = ctx->r[0];       // r0  <= 0fffffff
+	const u32 r1 = ctx->r[1];       // r1  <= 0ffffffc
+	const u32 r2 = ctx->r[2];       // r2  <= 0ffffffc
+	const u32 r3 = ctx->r[3];       // r3  <= 0ffffffc
+	const u32 rr0 = (r0 >> 2) * 5;  // rr0 <= 13fffffb // lose 2 bits...
+	const u32 rr1 = (r1 >> 2) + r1; // rr1 <= 13fffffb // rr1 == (r1 >> 2) * 5
+	const u32 rr2 = (r2 >> 2) + r2; // rr2 <= 13fffffb // rr1 == (r2 >> 2) * 5
+	const u32 rr3 = (r3 >> 2) + r3; // rr3 <= 13fffffb // rr1 == (r3 >> 2) * 5
+
+	// (h + c) * r, without carry propagation
+	const u64 x0 = s0*r0+ s1*rr3+ s2*rr2+ s3*rr1+ s4*rr0; // <= 97ffffe007fffff8
+	const u64 x1 = s0*r1+ s1*r0 + s2*rr3+ s3*rr2+ s4*rr1; // <= 8fffffe20ffffff6
+	const u64 x2 = s0*r2+ s1*r1 + s2*r0 + s3*rr3+ s4*rr2; // <= 87ffffe417fffff4
+	const u64 x3 = s0*r3+ s1*r2 + s2*r1 + s3*r0 + s4*rr3; // <= 7fffffe61ffffff2
+	const u32 x4 = s4 * (r0 & 3); // ...recover 2 bits    // <=                f
+
+	// partial reduction modulo 2^130 - 5
+	const u32 u5 = x4 + (x3 >> 32); // u5 <= 7ffffff5
+	const u64 u0 = (u5 >>  2) * 5 + (x0 & 0xffffffff);
+	const u64 u1 = (u0 >> 32)     + (x1 & 0xffffffff) + (x0 >> 32);
+	const u64 u2 = (u1 >> 32)     + (x2 & 0xffffffff) + (x1 >> 32);
+	const u64 u3 = (u2 >> 32)     + (x3 & 0xffffffff) + (x2 >> 32);
+	const u64 u4 = (u3 >> 32)     + (u5 & 3);
+
+	// Update the hash
+	ctx->h[0] = u0 & 0xffffffff; // u0 <= 1_9ffffff0
+	ctx->h[1] = u1 & 0xffffffff; // u1 <= 1_97ffffe0
+	ctx->h[2] = u2 & 0xffffffff; // u2 <= 1_8fffffe2
+	ctx->h[3] = u3 & 0xffffffff; // u3 <= 1_87ffffe4
+	ctx->h[4] = u4 & 0xffffffff; // u4 <=          4
+
+	//- # postconditions
+	//- ASSERT(ctx->h[4].limit() <= 4)
+	//- CQFD Poly1305
+}
+
+void crypto_poly1305_init(crypto_poly1305_ctx *ctx, const u8 key[32])
+{
+	ZERO(ctx->h, 5); // Initial hash is zero
+	ctx->c_idx = 0;
+	// load r and pad (r has some of its bits cleared)
+	load32_le_buf(ctx->r  , key   , 4);
+	load32_le_buf(ctx->pad, key+16, 4);
+	FOR (i, 0, 1) { ctx->r[i] &= 0x0fffffff; }
+	FOR (i, 1, 4) { ctx->r[i] &= 0x0ffffffc; }
+}
+
+void crypto_poly1305_update(crypto_poly1305_ctx *ctx,
+                            const u8 *message, size_t message_size)
+{
+	// Align ourselves with block boundaries
+	size_t aligned = MIN(align(ctx->c_idx, 16), message_size);
+	FOR (i, 0, aligned) {
+		ctx->c[ctx->c_idx] = *message;
+		ctx->c_idx++;
+		message++;
+		message_size--;
+	}
+
+	// If block is complete, process it
+	if (ctx->c_idx == 16) {
+		poly_block(ctx, ctx->c, 1);
+		ctx->c_idx = 0;
+	}
+
+	// Process the message block by block
+	size_t nb_blocks = message_size >> 4;
+	FOR (i, 0, nb_blocks) {
+		poly_block(ctx, message, 1);
+		message += 16;
+	}
+	message_size &= 15;
+
+	// remaining bytes (we never complete a block here)
+	FOR (i, 0, message_size) {
+		ctx->c[ctx->c_idx] = message[i];
+		ctx->c_idx++;
+	}
+}
+
+void crypto_poly1305_final(crypto_poly1305_ctx *ctx, u8 mac[16])
+{
+	// Process the last block (if any)
+	// We move the final 1 according to remaining input length
+	// (this will add less than 2^130 to the last input block)
+	if (ctx->c_idx != 0) {
+		ZERO(ctx->c + ctx->c_idx, 16 - ctx->c_idx);
+		ctx->c[ctx->c_idx] = 1;
+		poly_block(ctx, ctx->c, 0);
+	}
+
+	// check if we should subtract 2^130-5 by performing the
+	// corresponding carry propagation.
+	u64 c = 5;
+	FOR (i, 0, 4) {
+		c  += ctx->h[i];
+		c >>= 32;
+	}
+	c += ctx->h[4];
+	c  = (c >> 2) * 5; // shift the carry back to the beginning
+	// c now indicates how many times we should subtract 2^130-5 (0 or 1)
+	FOR (i, 0, 4) {
+		c += (u64)ctx->h[i] + ctx->pad[i];
+		store32_le(mac + i*4, (u32)c);
+		c = c >> 32;
+	}
+	WIPE_CTX(ctx);
+}
+
+void crypto_poly1305(u8     mac[16],  const u8 *message,
+                     size_t message_size, const u8  key[32])
+{
+	crypto_poly1305_ctx ctx;
+	crypto_poly1305_init  (&ctx, key);
+	crypto_poly1305_update(&ctx, message, message_size);
+	crypto_poly1305_final (&ctx, mac);
+}
+
+////////////////
+/// BLAKE2 b ///
+////////////////
+static const u64 iv[8] = {
+	0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
+	0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
+	0x510e527fade682d1, 0x9b05688c2b3e6c1f,
+	0x1f83d9abfb41bd6b, 0x5be0cd19137e2179,
+};
+
+static void blake2b_compress(crypto_blake2b_ctx *ctx, int is_last_block)
+{
+	static const u8 sigma[12][16] = {
+		{  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 },
+		{ 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 },
+		{ 11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9,  4 },
+		{  7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15,  8 },
+		{  9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 13 },
+		{  2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1,  9 },
+		{ 12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 11 },
+		{ 13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 10 },
+		{  6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10,  5 },
+		{ 10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13,  0 },
+		{  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 },
+		{ 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 },
+	};
+
+	// increment input offset
+	u64   *x = ctx->input_offset;
+	size_t y = ctx->input_idx;
+	x[0] += y;
+	if (x[0] < y) {
+		x[1]++;
+	}
+
+	// init work vector
+	u64 v0 = ctx->hash[0];  u64 v8  = iv[0];
+	u64 v1 = ctx->hash[1];  u64 v9  = iv[1];
+	u64 v2 = ctx->hash[2];  u64 v10 = iv[2];
+	u64 v3 = ctx->hash[3];  u64 v11 = iv[3];
+	u64 v4 = ctx->hash[4];  u64 v12 = iv[4] ^ ctx->input_offset[0];
+	u64 v5 = ctx->hash[5];  u64 v13 = iv[5] ^ ctx->input_offset[1];
+	u64 v6 = ctx->hash[6];  u64 v14 = iv[6] ^ (u64)~(is_last_block - 1);
+	u64 v7 = ctx->hash[7];  u64 v15 = iv[7];
+
+	// mangle work vector
+	u64 *input = ctx->input;
+#define BLAKE2_G(a, b, c, d, x, y)	\
+	a += b + x;  d = rotr64(d ^ a, 32); \
+	c += d;      b = rotr64(b ^ c, 24); \
+	a += b + y;  d = rotr64(d ^ a, 16); \
+	c += d;      b = rotr64(b ^ c, 63)
+#define BLAKE2_ROUND(i)	\
+	BLAKE2_G(v0, v4, v8 , v12, input[sigma[i][ 0]], input[sigma[i][ 1]]); \
+	BLAKE2_G(v1, v5, v9 , v13, input[sigma[i][ 2]], input[sigma[i][ 3]]); \
+	BLAKE2_G(v2, v6, v10, v14, input[sigma[i][ 4]], input[sigma[i][ 5]]); \
+	BLAKE2_G(v3, v7, v11, v15, input[sigma[i][ 6]], input[sigma[i][ 7]]); \
+	BLAKE2_G(v0, v5, v10, v15, input[sigma[i][ 8]], input[sigma[i][ 9]]); \
+	BLAKE2_G(v1, v6, v11, v12, input[sigma[i][10]], input[sigma[i][11]]); \
+	BLAKE2_G(v2, v7, v8 , v13, input[sigma[i][12]], input[sigma[i][13]]); \
+	BLAKE2_G(v3, v4, v9 , v14, input[sigma[i][14]], input[sigma[i][15]])
+
+#ifdef BLAKE2_NO_UNROLLING
+	FOR (i, 0, 12) {
+		BLAKE2_ROUND(i);
+	}
+#else
+	BLAKE2_ROUND(0);  BLAKE2_ROUND(1);  BLAKE2_ROUND(2);  BLAKE2_ROUND(3);
+	BLAKE2_ROUND(4);  BLAKE2_ROUND(5);  BLAKE2_ROUND(6);  BLAKE2_ROUND(7);
+	BLAKE2_ROUND(8);  BLAKE2_ROUND(9);  BLAKE2_ROUND(10); BLAKE2_ROUND(11);
+#endif
+
+	// update hash
+	ctx->hash[0] ^= v0 ^ v8;   ctx->hash[1] ^= v1 ^ v9;
+	ctx->hash[2] ^= v2 ^ v10;  ctx->hash[3] ^= v3 ^ v11;
+	ctx->hash[4] ^= v4 ^ v12;  ctx->hash[5] ^= v5 ^ v13;
+	ctx->hash[6] ^= v6 ^ v14;  ctx->hash[7] ^= v7 ^ v15;
+}
+
+void crypto_blake2b_keyed_init(crypto_blake2b_ctx *ctx, size_t hash_size,
+                               const u8 *key, size_t key_size)
+{
+	// initial hash
+	COPY(ctx->hash, iv, 8);
+	ctx->hash[0] ^= 0x01010000 ^ (key_size << 8) ^ hash_size;
+
+	ctx->input_offset[0] = 0;  // beginning of the input, no offset
+	ctx->input_offset[1] = 0;  // beginning of the input, no offset
+	ctx->hash_size       = hash_size;
+	ctx->input_idx       = 0;
+	ZERO(ctx->input, 16);
+
+	// if there is a key, the first block is that key (padded with zeroes)
+	if (key_size > 0) {
+		u8 key_block[128] = {0};
+		COPY(key_block, key, key_size);
+		// same as calling crypto_blake2b_update(ctx, key_block , 128)
+		load64_le_buf(ctx->input, key_block, 16);
+		ctx->input_idx = 128;
+	}
+}
+
+void crypto_blake2b_init(crypto_blake2b_ctx *ctx, size_t hash_size)
+{
+	crypto_blake2b_keyed_init(ctx, hash_size, 0, 0);
+}
+
+void crypto_blake2b_update(crypto_blake2b_ctx *ctx,
+                           const u8 *message, size_t message_size)
+{
+	// Avoid undefined NULL pointer increments with empty messages
+	if (message_size == 0) {
+		return;
+	}
+
+	// Align with word boundaries
+	if ((ctx->input_idx & 7) != 0) {
+		size_t nb_bytes = MIN(align(ctx->input_idx, 8), message_size);
+		size_t word     = ctx->input_idx >> 3;
+		size_t byte     = ctx->input_idx & 7;
+		FOR (i, 0, nb_bytes) {
+			ctx->input[word] |= (u64)message[i] << ((byte + i) << 3);
+		}
+		ctx->input_idx += nb_bytes;
+		message        += nb_bytes;
+		message_size   -= nb_bytes;
+	}
+
+	// Align with block boundaries (faster than byte by byte)
+	if ((ctx->input_idx & 127) != 0) {
+		size_t nb_words = MIN(align(ctx->input_idx, 128), message_size) >> 3;
+		load64_le_buf(ctx->input + (ctx->input_idx >> 3), message, nb_words);
+		ctx->input_idx += nb_words << 3;
+		message        += nb_words << 3;
+		message_size   -= nb_words << 3;
+	}
+
+	// Process block by block
+	size_t nb_blocks = message_size >> 7;
+	FOR (i, 0, nb_blocks) {
+		if (ctx->input_idx == 128) {
+			blake2b_compress(ctx, 0);
+		}
+		load64_le_buf(ctx->input, message, 16);
+		message += 128;
+		ctx->input_idx = 128;
+	}
+	message_size &= 127;
+
+	if (message_size != 0) {
+		// Compress block & flush input buffer as needed
+		if (ctx->input_idx == 128) {
+			blake2b_compress(ctx, 0);
+			ctx->input_idx = 0;
+		}
+		if (ctx->input_idx == 0) {
+			ZERO(ctx->input, 16);
+		}
+		// Fill remaining words (faster than byte by byte)
+		size_t nb_words = message_size >> 3;
+		load64_le_buf(ctx->input, message, nb_words);
+		ctx->input_idx += nb_words << 3;
+		message        += nb_words << 3;
+		message_size   -= nb_words << 3;
+
+		// Fill remaining bytes
+		FOR (i, 0, message_size) {
+			size_t word = ctx->input_idx >> 3;
+			size_t byte = ctx->input_idx & 7;
+			ctx->input[word] |= (u64)message[i] << (byte << 3);
+			ctx->input_idx++;
+		}
+	}
+}
+
+void crypto_blake2b_final(crypto_blake2b_ctx *ctx, u8 *hash)
+{
+	blake2b_compress(ctx, 1); // compress the last block
+	size_t hash_size = MIN(ctx->hash_size, 64);
+	size_t nb_words  = hash_size >> 3;
+	store64_le_buf(hash, ctx->hash, nb_words);
+	FOR (i, nb_words << 3, hash_size) {
+		hash[i] = (ctx->hash[i >> 3] >> (8 * (i & 7))) & 0xff;
+	}
+	WIPE_CTX(ctx);
+}
+
+void crypto_blake2b_keyed(u8 *hash,          size_t hash_size,
+                          const u8 *key,     size_t key_size,
+                          const u8 *message, size_t message_size)
+{
+	crypto_blake2b_ctx ctx;
+	crypto_blake2b_keyed_init(&ctx, hash_size, key, key_size);
+	crypto_blake2b_update    (&ctx, message, message_size);
+	crypto_blake2b_final     (&ctx, hash);
+}
+
+void crypto_blake2b(u8 *hash, size_t hash_size, const u8 *msg, size_t msg_size)
+{
+	crypto_blake2b_keyed(hash, hash_size, 0, 0, msg, msg_size);
+}
+
+//////////////
+/// Argon2 ///
+//////////////
+// references to R, Z, Q etc. come from the spec
+
+// Argon2 operates on 1024 byte blocks.
+typedef struct { u64 a[128]; } blk;
+
+// updates a BLAKE2 hash with a 32 bit word, little endian.
+static void blake_update_32(crypto_blake2b_ctx *ctx, u32 input)
+{
+	u8 buf[4];
+	store32_le(buf, input);
+	crypto_blake2b_update(ctx, buf, 4);
+	WIPE_BUFFER(buf);
+}
+
+static void blake_update_32_buf(crypto_blake2b_ctx *ctx,
+                                const u8 *buf, u32 size)
+{
+	blake_update_32(ctx, size);
+	crypto_blake2b_update(ctx, buf, size);
+}
+
+
+static void copy_block(blk *o,const blk*in){FOR(i, 0, 128) o->a[i]  = in->a[i];}
+static void  xor_block(blk *o,const blk*in){FOR(i, 0, 128) o->a[i] ^= in->a[i];}
+
+// Hash with a virtually unlimited digest size.
+// Doesn't extract more entropy than the base hash function.
+// Mainly used for filling a whole kilobyte block with pseudo-random bytes.
+// (One could use a stream cipher with a seed hash as the key, but
+//  this would introduce another dependency —and point of failure.)
+static void extended_hash(u8       *digest, u32 digest_size,
+                          const u8 *input , u32 input_size)
+{
+	crypto_blake2b_ctx ctx;
+	crypto_blake2b_init  (&ctx, MIN(digest_size, 64));
+	blake_update_32      (&ctx, digest_size);
+	crypto_blake2b_update(&ctx, input, input_size);
+	crypto_blake2b_final (&ctx, digest);
+
+	if (digest_size > 64) {
+		// the conversion to u64 avoids integer overflow on
+		// ludicrously big hash sizes.
+		u32 r   = (u32)(((u64)digest_size + 31) >> 5) - 2;
+		u32 i   =  1;
+		u32 in  =  0;
+		u32 out = 32;
+		while (i < r) {
+			// Input and output overlap. This is intentional
+			crypto_blake2b(digest + out, 64, digest + in, 64);
+			i   +=  1;
+			in  += 32;
+			out += 32;
+		}
+		crypto_blake2b(digest + out, digest_size - (32 * r), digest + in , 64);
+	}
+}
+
+#define LSB(x) ((x) & 0xffffffff)
+#define G(a, b, c, d)	\
+	a += b + 2 * LSB(a) * LSB(b);  d ^= a;  d = rotr64(d, 32); \
+	c += d + 2 * LSB(c) * LSB(d);  b ^= c;  b = rotr64(b, 24); \
+	a += b + 2 * LSB(a) * LSB(b);  d ^= a;  d = rotr64(d, 16); \
+	c += d + 2 * LSB(c) * LSB(d);  b ^= c;  b = rotr64(b, 63)
+#define ROUND(v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7,	\
+              v8,  v9, v10, v11, v12, v13, v14, v15)	\
+	G(v0, v4,  v8, v12);  G(v1, v5,  v9, v13); \
+	G(v2, v6, v10, v14);  G(v3, v7, v11, v15); \
+	G(v0, v5, v10, v15);  G(v1, v6, v11, v12); \
+	G(v2, v7,  v8, v13);  G(v3, v4,  v9, v14)
+
+// Core of the compression function G.  Computes Z from R in place.
+static void g_rounds(blk *b)
+{
+	// column rounds (work_block = Q)
+	for (int i = 0; i < 128; i += 16) {
+		ROUND(b->a[i   ], b->a[i+ 1], b->a[i+ 2], b->a[i+ 3],
+		      b->a[i+ 4], b->a[i+ 5], b->a[i+ 6], b->a[i+ 7],
+		      b->a[i+ 8], b->a[i+ 9], b->a[i+10], b->a[i+11],
+		      b->a[i+12], b->a[i+13], b->a[i+14], b->a[i+15]);
+	}
+	// row rounds (b = Z)
+	for (int i = 0; i < 16; i += 2) {
+		ROUND(b->a[i   ], b->a[i+ 1], b->a[i+ 16], b->a[i+ 17],
+		      b->a[i+32], b->a[i+33], b->a[i+ 48], b->a[i+ 49],
+		      b->a[i+64], b->a[i+65], b->a[i+ 80], b->a[i+ 81],
+		      b->a[i+96], b->a[i+97], b->a[i+112], b->a[i+113]);
+	}
+}
+
+const crypto_argon2_extras crypto_argon2_no_extras = { 0, 0, 0, 0 };
+
+void crypto_argon2(u8 *hash, u32 hash_size, void *work_area,
+                   crypto_argon2_config config,
+                   crypto_argon2_inputs inputs,
+                   crypto_argon2_extras extras)
+{
+	const u32 segment_size = config.nb_blocks / config.nb_lanes / 4;
+	const u32 lane_size    = segment_size * 4;
+	const u32 nb_blocks    = lane_size * config.nb_lanes; // rounding down
+
+	// work area seen as blocks (must be suitably aligned)
+	blk *blocks = (blk*)work_area;
+	{
+		u8 initial_hash[72]; // 64 bytes plus 2 words for future hashes
+		crypto_blake2b_ctx ctx;
+		crypto_blake2b_init (&ctx, 64);
+		blake_update_32     (&ctx, config.nb_lanes ); // p: number of "threads"
+		blake_update_32     (&ctx, hash_size);
+		blake_update_32     (&ctx, config.nb_blocks);
+		blake_update_32     (&ctx, config.nb_passes);
+		blake_update_32     (&ctx, 0x13);             // v: version number
+		blake_update_32     (&ctx, config.algorithm); // y: Argon2i, Argon2d...
+		blake_update_32_buf (&ctx, inputs.pass, inputs.pass_size);
+		blake_update_32_buf (&ctx, inputs.salt, inputs.salt_size);
+		blake_update_32_buf (&ctx, extras.key,  extras.key_size);
+		blake_update_32_buf (&ctx, extras.ad,   extras.ad_size);
+		crypto_blake2b_final(&ctx, initial_hash); // fill 64 first bytes only
+
+		// fill first 2 blocks of each lane
+		u8 hash_area[1024];
+		FOR_T(u32, l, 0, config.nb_lanes) {
+			FOR_T(u32, i, 0, 2) {
+				store32_le(initial_hash + 64, i); // first  additional word
+				store32_le(initial_hash + 68, l); // second additional word
+				extended_hash(hash_area, 1024, initial_hash, 72);
+				load64_le_buf(blocks[l * lane_size + i].a, hash_area, 128);
+			}
+		}
+
+		WIPE_BUFFER(initial_hash);
+		WIPE_BUFFER(hash_area);
+	}
+
+	// Argon2i and Argon2id start with constant time indexing
+	int constant_time = config.algorithm != CRYPTO_ARGON2_D;
+
+	// Fill (and re-fill) the rest of the blocks
+	//
+	// Note: even though each segment within the same slice can be
+	// computed in parallel, (one thread per lane), we are computing
+	// them sequentially, because Monocypher doesn't support threads.
+	//
+	// Yet optimal performance (and therefore security) requires one
+	// thread per lane. The only reason Monocypher supports multiple
+	// lanes is compatibility.
+	blk tmp;
+	FOR_T(u32, pass, 0, config.nb_passes) {
+		FOR_T(u32, slice, 0, 4) {
+			// On the first slice of the first pass,
+			// blocks 0 and 1 are already filled, hence pass_offset.
+			u32 pass_offset  = pass == 0 && slice == 0 ? 2 : 0;
+			u32 slice_offset = slice * segment_size;
+
+			// Argon2id switches back to non-constant time indexing
+			// after the first two slices of the first pass
+			if (slice == 2 && config.algorithm == CRYPTO_ARGON2_ID) {
+				constant_time = 0;
+			}
+
+			// Each iteration of the following loop may be performed in
+			// a separate thread.  All segments must be fully completed
+			// before we start filling the next slice.
+			FOR_T(u32, segment, 0, config.nb_lanes) {
+				blk index_block;
+				u32 index_ctr = 1;
+				FOR_T (u32, block, pass_offset, segment_size) {
+					// Current and previous blocks
+					u32  lane_offset   = segment * lane_size;
+					blk *segment_start = blocks + lane_offset + slice_offset;
+					blk *current       = segment_start + block;
+					blk *previous      =
+						block == 0 && slice_offset == 0
+						? segment_start + lane_size - 1
+						: segment_start + block - 1;
+
+					u64 index_seed;
+					if (constant_time) {
+						if (block == pass_offset || (block % 128) == 0) {
+							// Fill or refresh deterministic indices block
+
+							// seed the beginning of the block...
+							ZERO(index_block.a, 128);
+							index_block.a[0] = pass;
+							index_block.a[1] = segment;
+							index_block.a[2] = slice;
+							index_block.a[3] = nb_blocks;
+							index_block.a[4] = config.nb_passes;
+							index_block.a[5] = config.algorithm;
+							index_block.a[6] = index_ctr;
+							index_ctr++;
+
+							// ... then shuffle it
+							copy_block(&tmp, &index_block);
+							g_rounds  (&index_block);
+							xor_block (&index_block, &tmp);
+							copy_block(&tmp, &index_block);
+							g_rounds  (&index_block);
+							xor_block (&index_block, &tmp);
+						}
+						index_seed = index_block.a[block % 128];
+					} else {
+						index_seed = previous->a[0];
+					}
+
+					// Establish the reference set.  *Approximately* comprises:
+					// - The last 3 slices (if they exist yet)
+					// - The already constructed blocks in the current segment
+					u32 next_slice   = ((slice + 1) % 4) * segment_size;
+					u32 window_start = pass == 0 ? 0     : next_slice;
+					u32 nb_segments  = pass == 0 ? slice : 3;
+					u32 window_size  = nb_segments * segment_size + block - 1;
+
+					// Find reference block
+					u64  j1        = index_seed & 0xffffffff; // block selector
+					u64  j2        = index_seed >> 32;        // lane selector
+					u64  x         = (j1 * j1)         >> 32;
+					u64  y         = (window_size * x) >> 32;
+					u64  z         = (window_size - 1) - y;
+					u64  ref       = (window_start + z) % lane_size;
+					u32  index     = (j2%config.nb_lanes)*lane_size + (u32)ref;
+					blk *reference = blocks + index;
+
+					// Shuffle the previous & reference block
+					// into the current block
+					copy_block(&tmp, previous);
+					xor_block (&tmp, reference);
+					if (pass == 0) { copy_block(current, &tmp); }
+					else           { xor_block (current, &tmp); }
+					g_rounds  (&tmp);
+					xor_block (current, &tmp);
+				}
+			}
+		}
+	}
+
+	// Wipe temporary block
+	volatile u64* p = tmp.a;
+	ZERO(p, 128);
+
+	// XOR last blocks of each lane
+	blk *last_block = blocks + lane_size - 1;
+	FOR_T (u32, lane, 1, config.nb_lanes) {
+		blk *next_block = last_block + lane_size;
+		xor_block(next_block, last_block);
+		last_block = next_block;
+	}
+
+	// Serialize last block
+	u8 final_block[1024];
+	store64_le_buf(final_block, last_block->a, 128);
+
+	// Wipe work area
+	p = (u64*)work_area;
+	ZERO(p, 128 * nb_blocks);
+
+	// Hash the very last block with H' into the output hash
+	extended_hash(hash, hash_size, final_block, 1024);
+	WIPE_BUFFER(final_block);
+}
+
+////////////////////////////////////
+/// Arithmetic modulo 2^255 - 19 ///
+////////////////////////////////////
+//  Originally taken from SUPERCOP's ref10 implementation.
+//  A bit bigger than TweetNaCl, over 4 times faster.
+
+// field element
+typedef i32 fe[10];
+
+// field constants
+//
+// fe_one      : 1
+// sqrtm1      : sqrt(-1)
+// d           :     -121665 / 121666
+// D2          : 2 * -121665 / 121666
+// lop_x, lop_y: low order point in Edwards coordinates
+// ufactor     : -sqrt(-1) * 2
+// A2          : 486662^2  (A squared)
+static const fe fe_one  = {1};
+static const fe sqrtm1  = {
+	-32595792, -7943725, 9377950, 3500415, 12389472,
+	-272473, -25146209, -2005654, 326686, 11406482,
+};
+static const fe d       = {
+	-10913610, 13857413, -15372611, 6949391, 114729,
+	-8787816, -6275908, -3247719, -18696448, -12055116,
+};
+static const fe D2      = {
+	-21827239, -5839606, -30745221, 13898782, 229458,
+	15978800, -12551817, -6495438, 29715968, 9444199,
+};
+static const fe lop_x   = {
+	21352778, 5345713, 4660180, -8347857, 24143090,
+	14568123, 30185756, -12247770, -33528939, 8345319,
+};
+static const fe lop_y   = {
+	-6952922, -1265500, 6862341, -7057498, -4037696,
+	-5447722, 31680899, -15325402, -19365852, 1569102,
+};
+static const fe ufactor = {
+	-1917299, 15887451, -18755900, -7000830, -24778944,
+	544946, -16816446, 4011309, -653372, 10741468,
+};
+static const fe A2      = {
+	12721188, 3529, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static void fe_0(fe h) {           ZERO(h  , 10); }
+static void fe_1(fe h) { h[0] = 1; ZERO(h+1,  9); }
+
+static void fe_copy(fe h,const fe f           ){FOR(i,0,10) h[i] =  f[i];      }
+static void fe_neg (fe h,const fe f           ){FOR(i,0,10) h[i] = -f[i];      }
+static void fe_add (fe h,const fe f,const fe g){FOR(i,0,10) h[i] = f[i] + g[i];}
+static void fe_sub (fe h,const fe f,const fe g){FOR(i,0,10) h[i] = f[i] - g[i];}
+
+static void fe_cswap(fe f, fe g, int b)
+{
+	i32 mask = -b; // -1 = 0xffffffff
+	FOR (i, 0, 10) {
+		i32 x = (f[i] ^ g[i]) & mask;
+		f[i] = f[i] ^ x;
+		g[i] = g[i] ^ x;
+	}
+}
+
+static void fe_ccopy(fe f, const fe g, int b)
+{
+	i32 mask = -b; // -1 = 0xffffffff
+	FOR (i, 0, 10) {
+		i32 x = (f[i] ^ g[i]) & mask;
+		f[i] = f[i] ^ x;
+	}
+}
+
+
+// Signed carry propagation
+// ------------------------
+//
+// Let t be a number.  It can be uniquely decomposed thus:
+//
+//    t = h*2^26 + l
+//    such that -2^25 <= l < 2^25
+//
+// Let c = (t + 2^25) / 2^26            (rounded down)
+//     c = (h*2^26 + l + 2^25) / 2^26   (rounded down)
+//     c =  h   +   (l + 2^25) / 2^26   (rounded down)
+//     c =  h                           (exactly)
+// Because 0 <= l + 2^25 < 2^26
+//
+// Let u = t          - c*2^26
+//     u = h*2^26 + l - h*2^26
+//     u = l
+// Therefore, -2^25 <= u < 2^25
+//
+// Additionally, if |t| < x, then |h| < x/2^26 (rounded down)
+//
+// Notations:
+// - In C, 1<<25 means 2^25.
+// - In C, x>>25 means floor(x / (2^25)).
+// - All of the above applies with 25 & 24 as well as 26 & 25.
+//
+//
+// Note on negative right shifts
+// -----------------------------
+//
+// In C, x >> n, where x is a negative integer, is implementation
+// defined.  In practice, all platforms do arithmetic shift, which is
+// equivalent to division by 2^26, rounded down.  Some compilers, like
+// GCC, even guarantee it.
+//
+// If we ever stumble upon a platform that does not propagate the sign
+// bit (we won't), visible failures will show at the slightest test, and
+// the signed shifts can be replaced by the following:
+//
+//     typedef struct { i64 x:39; } s25;
+//     typedef struct { i64 x:38; } s26;
+//     i64 shift25(i64 x) { s25 s; s.x = ((u64)x)>>25; return s.x; }
+//     i64 shift26(i64 x) { s26 s; s.x = ((u64)x)>>26; return s.x; }
+//
+// Current compilers cannot optimise this, causing a 30% drop in
+// performance.  Fairly expensive for something that never happens.
+//
+//
+// Precondition
+// ------------
+//
+// |t0|       < 2^63
+// |t1|..|t9| < 2^62
+//
+// Algorithm
+// ---------
+// c   = t0 + 2^25 / 2^26   -- |c|  <= 2^36
+// t0 -= c * 2^26           -- |t0| <= 2^25
+// t1 += c                  -- |t1| <= 2^63
+//
+// c   = t4 + 2^25 / 2^26   -- |c|  <= 2^36
+// t4 -= c * 2^26           -- |t4| <= 2^25
+// t5 += c                  -- |t5| <= 2^63
+//
+// c   = t1 + 2^24 / 2^25   -- |c|  <= 2^38
+// t1 -= c * 2^25           -- |t1| <= 2^24
+// t2 += c                  -- |t2| <= 2^63
+//
+// c   = t5 + 2^24 / 2^25   -- |c|  <= 2^38
+// t5 -= c * 2^25           -- |t5| <= 2^24
+// t6 += c                  -- |t6| <= 2^63
+//
+// c   = t2 + 2^25 / 2^26   -- |c|  <= 2^37
+// t2 -= c * 2^26           -- |t2| <= 2^25        < 1.1 * 2^25  (final t2)
+// t3 += c                  -- |t3| <= 2^63
+//
+// c   = t6 + 2^25 / 2^26   -- |c|  <= 2^37
+// t6 -= c * 2^26           -- |t6| <= 2^25        < 1.1 * 2^25  (final t6)
+// t7 += c                  -- |t7| <= 2^63
+//
+// c   = t3 + 2^24 / 2^25   -- |c|  <= 2^38
+// t3 -= c * 2^25           -- |t3| <= 2^24        < 1.1 * 2^24  (final t3)
+// t4 += c                  -- |t4| <= 2^25 + 2^38 < 2^39
+//
+// c   = t7 + 2^24 / 2^25   -- |c|  <= 2^38
+// t7 -= c * 2^25           -- |t7| <= 2^24        < 1.1 * 2^24  (final t7)
+// t8 += c                  -- |t8| <= 2^63
+//
+// c   = t4 + 2^25 / 2^26   -- |c|  <= 2^13
+// t4 -= c * 2^26           -- |t4| <= 2^25        < 1.1 * 2^25  (final t4)
+// t5 += c                  -- |t5| <= 2^24 + 2^13 < 1.1 * 2^24  (final t5)
+//
+// c   = t8 + 2^25 / 2^26   -- |c|  <= 2^37
+// t8 -= c * 2^26           -- |t8| <= 2^25        < 1.1 * 2^25  (final t8)
+// t9 += c                  -- |t9| <= 2^63
+//
+// c   = t9 + 2^24 / 2^25   -- |c|  <= 2^38
+// t9 -= c * 2^25           -- |t9| <= 2^24        < 1.1 * 2^24  (final t9)
+// t0 += c * 19             -- |t0| <= 2^25 + 2^38*19 < 2^44
+//
+// c   = t0 + 2^25 / 2^26   -- |c|  <= 2^18
+// t0 -= c * 2^26           -- |t0| <= 2^25        < 1.1 * 2^25  (final t0)
+// t1 += c                  -- |t1| <= 2^24 + 2^18 < 1.1 * 2^24  (final t1)
+//
+// Postcondition
+// -------------
+//   |t0|, |t2|, |t4|, |t6|, |t8|  <  1.1 * 2^25
+//   |t1|, |t3|, |t5|, |t7|, |t9|  <  1.1 * 2^24
+#define FE_CARRY	\
+	i64 c; \
+	c = (t0 + ((i64)1<<25)) >> 26;  t0 -= c * ((i64)1 << 26);  t1 += c; \
+	c = (t4 + ((i64)1<<25)) >> 26;  t4 -= c * ((i64)1 << 26);  t5 += c; \
+	c = (t1 + ((i64)1<<24)) >> 25;  t1 -= c * ((i64)1 << 25);  t2 += c; \
+	c = (t5 + ((i64)1<<24)) >> 25;  t5 -= c * ((i64)1 << 25);  t6 += c; \
+	c = (t2 + ((i64)1<<25)) >> 26;  t2 -= c * ((i64)1 << 26);  t3 += c; \
+	c = (t6 + ((i64)1<<25)) >> 26;  t6 -= c * ((i64)1 << 26);  t7 += c; \
+	c = (t3 + ((i64)1<<24)) >> 25;  t3 -= c * ((i64)1 << 25);  t4 += c; \
+	c = (t7 + ((i64)1<<24)) >> 25;  t7 -= c * ((i64)1 << 25);  t8 += c; \
+	c = (t4 + ((i64)1<<25)) >> 26;  t4 -= c * ((i64)1 << 26);  t5 += c; \
+	c = (t8 + ((i64)1<<25)) >> 26;  t8 -= c * ((i64)1 << 26);  t9 += c; \
+	c = (t9 + ((i64)1<<24)) >> 25;  t9 -= c * ((i64)1 << 25);  t0 += c * 19; \
+	c = (t0 + ((i64)1<<25)) >> 26;  t0 -= c * ((i64)1 << 26);  t1 += c; \
+	h[0]=(i32)t0;  h[1]=(i32)t1;  h[2]=(i32)t2;  h[3]=(i32)t3;  h[4]=(i32)t4; \
+	h[5]=(i32)t5;  h[6]=(i32)t6;  h[7]=(i32)t7;  h[8]=(i32)t8;  h[9]=(i32)t9
+
+// Decodes a field element from a byte buffer.
+// mask specifies how many bits we ignore.
+// Traditionally we ignore 1. It's useful for EdDSA,
+// which uses that bit to denote the sign of x.
+// Elligator however uses positive representatives,
+// which means ignoring 2 bits instead.
+static void fe_frombytes_mask(fe h, const u8 s[32], unsigned nb_mask)
+{
+	u32 mask = 0xffffff >> nb_mask;
+	i64 t0 =  load32_le(s);                    // t0 < 2^32
+	i64 t1 =  load24_le(s +  4) << 6;          // t1 < 2^30
+	i64 t2 =  load24_le(s +  7) << 5;          // t2 < 2^29
+	i64 t3 =  load24_le(s + 10) << 3;          // t3 < 2^27
+	i64 t4 =  load24_le(s + 13) << 2;          // t4 < 2^26
+	i64 t5 =  load32_le(s + 16);               // t5 < 2^32
+	i64 t6 =  load24_le(s + 20) << 7;          // t6 < 2^31
+	i64 t7 =  load24_le(s + 23) << 5;          // t7 < 2^29
+	i64 t8 =  load24_le(s + 26) << 4;          // t8 < 2^28
+	i64 t9 = (load24_le(s + 29) & mask) << 2;  // t9 < 2^25
+	FE_CARRY;                                  // Carry precondition OK
+}
+
+static void fe_frombytes(fe h, const u8 s[32])
+{
+	fe_frombytes_mask(h, s, 1);
+}
+
+
+// Precondition
+//   |h[0]|, |h[2]|, |h[4]|, |h[6]|, |h[8]|  <  1.1 * 2^25
+//   |h[1]|, |h[3]|, |h[5]|, |h[7]|, |h[9]|  <  1.1 * 2^24
+//
+// Therefore, |h| < 2^255-19
+// There are two possibilities:
+//
+// - If h is positive, all we need to do is reduce its individual
+//   limbs down to their tight positive range.
+// - If h is negative, we also need to add 2^255-19 to it.
+//   Or just remove 19 and chop off any excess bit.
+static void fe_tobytes(u8 s[32], const fe h)
+{
+	i32 t[10];
+	COPY(t, h, 10);
+	i32 q = (19 * t[9] + (((i32) 1) << 24)) >> 25;
+	//                 |t9|                    < 1.1 * 2^24
+	//  -1.1 * 2^24  <  t9                     < 1.1 * 2^24
+	//  -21  * 2^24  <  19 * t9                < 21  * 2^24
+	//  -2^29        <  19 * t9 + 2^24         < 2^29
+	//  -2^29 / 2^25 < (19 * t9 + 2^24) / 2^25 < 2^29 / 2^25
+	//  -16          < (19 * t9 + 2^24) / 2^25 < 16
+	FOR (i, 0, 5) {
+		q += t[2*i  ]; q >>= 26; // q = 0 or -1
+		q += t[2*i+1]; q >>= 25; // q = 0 or -1
+	}
+	// q =  0 iff h >= 0
+	// q = -1 iff h <  0
+	// Adding q * 19 to h reduces h to its proper range.
+	q *= 19;  // Shift carry back to the beginning
+	FOR (i, 0, 5) {
+		t[i*2  ] += q;  q = t[i*2  ] >> 26;  t[i*2  ] -= q * ((i32)1 << 26);
+		t[i*2+1] += q;  q = t[i*2+1] >> 25;  t[i*2+1] -= q * ((i32)1 << 25);
+	}
+	// h is now fully reduced, and q represents the excess bit.
+
+	store32_le(s +  0, ((u32)t[0] >>  0) | ((u32)t[1] << 26));
+	store32_le(s +  4, ((u32)t[1] >>  6) | ((u32)t[2] << 19));
+	store32_le(s +  8, ((u32)t[2] >> 13) | ((u32)t[3] << 13));
+	store32_le(s + 12, ((u32)t[3] >> 19) | ((u32)t[4] <<  6));
+	store32_le(s + 16, ((u32)t[5] >>  0) | ((u32)t[6] << 25));
+	store32_le(s + 20, ((u32)t[6] >>  7) | ((u32)t[7] << 19));
+	store32_le(s + 24, ((u32)t[7] >> 13) | ((u32)t[8] << 12));
+	store32_le(s + 28, ((u32)t[8] >> 20) | ((u32)t[9] <<  6));
+
+	WIPE_BUFFER(t);
+}
+
+// Precondition
+// -------------
+//   |f0|, |f2|, |f4|, |f6|, |f8|  <  1.65 * 2^26
+//   |f1|, |f3|, |f5|, |f7|, |f9|  <  1.65 * 2^25
+//
+//   |g0|, |g2|, |g4|, |g6|, |g8|  <  1.65 * 2^26
+//   |g1|, |g3|, |g5|, |g7|, |g9|  <  1.65 * 2^25
+static void fe_mul_small(fe h, const fe f, i32 g)
+{
+	i64 t0 = f[0] * (i64) g;  i64 t1 = f[1] * (i64) g;
+	i64 t2 = f[2] * (i64) g;  i64 t3 = f[3] * (i64) g;
+	i64 t4 = f[4] * (i64) g;  i64 t5 = f[5] * (i64) g;
+	i64 t6 = f[6] * (i64) g;  i64 t7 = f[7] * (i64) g;
+	i64 t8 = f[8] * (i64) g;  i64 t9 = f[9] * (i64) g;
+	// |t0|, |t2|, |t4|, |t6|, |t8|  <  1.65 * 2^26 * 2^31  < 2^58
+	// |t1|, |t3|, |t5|, |t7|, |t9|  <  1.65 * 2^25 * 2^31  < 2^57
+
+	FE_CARRY; // Carry precondition OK
+}
+
+// Precondition
+// -------------
+//   |f0|, |f2|, |f4|, |f6|, |f8|  <  1.65 * 2^26
+//   |f1|, |f3|, |f5|, |f7|, |f9|  <  1.65 * 2^25
+//
+//   |g0|, |g2|, |g4|, |g6|, |g8|  <  1.65 * 2^26
+//   |g1|, |g3|, |g5|, |g7|, |g9|  <  1.65 * 2^25
+static void fe_mul(fe h, const fe f, const fe g)
+{
+	// Everything is unrolled and put in temporary variables.
+	// We could roll the loop, but that would make curve25519 twice as slow.
+	i32 f0 = f[0]; i32 f1 = f[1]; i32 f2 = f[2]; i32 f3 = f[3]; i32 f4 = f[4];
+	i32 f5 = f[5]; i32 f6 = f[6]; i32 f7 = f[7]; i32 f8 = f[8]; i32 f9 = f[9];
+	i32 g0 = g[0]; i32 g1 = g[1]; i32 g2 = g[2]; i32 g3 = g[3]; i32 g4 = g[4];
+	i32 g5 = g[5]; i32 g6 = g[6]; i32 g7 = g[7]; i32 g8 = g[8]; i32 g9 = g[9];
+	i32 F1 = f1*2; i32 F3 = f3*2; i32 F5 = f5*2; i32 F7 = f7*2; i32 F9 = f9*2;
+	i32 G1 = g1*19;  i32 G2 = g2*19;  i32 G3 = g3*19;
+	i32 G4 = g4*19;  i32 G5 = g5*19;  i32 G6 = g6*19;
+	i32 G7 = g7*19;  i32 G8 = g8*19;  i32 G9 = g9*19;
+	// |F1|, |F3|, |F5|, |F7|, |F9|  <  1.65 * 2^26
+	// |G0|, |G2|, |G4|, |G6|, |G8|  <  2^31
+	// |G1|, |G3|, |G5|, |G7|, |G9|  <  2^30
+
+	i64 t0 = f0*(i64)g0 + F1*(i64)G9 + f2*(i64)G8 + F3*(i64)G7 + f4*(i64)G6
+		+    F5*(i64)G5 + f6*(i64)G4 + F7*(i64)G3 + f8*(i64)G2 + F9*(i64)G1;
+	i64 t1 = f0*(i64)g1 + f1*(i64)g0 + f2*(i64)G9 + f3*(i64)G8 + f4*(i64)G7
+		+    f5*(i64)G6 + f6*(i64)G5 + f7*(i64)G4 + f8*(i64)G3 + f9*(i64)G2;
+	i64 t2 = f0*(i64)g2 + F1*(i64)g1 + f2*(i64)g0 + F3*(i64)G9 + f4*(i64)G8
+		+    F5*(i64)G7 + f6*(i64)G6 + F7*(i64)G5 + f8*(i64)G4 + F9*(i64)G3;
+	i64 t3 = f0*(i64)g3 + f1*(i64)g2 + f2*(i64)g1 + f3*(i64)g0 + f4*(i64)G9
+		+    f5*(i64)G8 + f6*(i64)G7 + f7*(i64)G6 + f8*(i64)G5 + f9*(i64)G4;
+	i64 t4 = f0*(i64)g4 + F1*(i64)g3 + f2*(i64)g2 + F3*(i64)g1 + f4*(i64)g0
+		+    F5*(i64)G9 + f6*(i64)G8 + F7*(i64)G7 + f8*(i64)G6 + F9*(i64)G5;
+	i64 t5 = f0*(i64)g5 + f1*(i64)g4 + f2*(i64)g3 + f3*(i64)g2 + f4*(i64)g1
+		+    f5*(i64)g0 + f6*(i64)G9 + f7*(i64)G8 + f8*(i64)G7 + f9*(i64)G6;
+	i64 t6 = f0*(i64)g6 + F1*(i64)g5 + f2*(i64)g4 + F3*(i64)g3 + f4*(i64)g2
+		+    F5*(i64)g1 + f6*(i64)g0 + F7*(i64)G9 + f8*(i64)G8 + F9*(i64)G7;
+	i64 t7 = f0*(i64)g7 + f1*(i64)g6 + f2*(i64)g5 + f3*(i64)g4 + f4*(i64)g3
+		+    f5*(i64)g2 + f6*(i64)g1 + f7*(i64)g0 + f8*(i64)G9 + f9*(i64)G8;
+	i64 t8 = f0*(i64)g8 + F1*(i64)g7 + f2*(i64)g6 + F3*(i64)g5 + f4*(i64)g4
+		+    F5*(i64)g3 + f6*(i64)g2 + F7*(i64)g1 + f8*(i64)g0 + F9*(i64)G9;
+	i64 t9 = f0*(i64)g9 + f1*(i64)g8 + f2*(i64)g7 + f3*(i64)g6 + f4*(i64)g5
+		+    f5*(i64)g4 + f6*(i64)g3 + f7*(i64)g2 + f8*(i64)g1 + f9*(i64)g0;
+	// t0 < 0.67 * 2^61
+	// t1 < 0.41 * 2^61
+	// t2 < 0.52 * 2^61
+	// t3 < 0.32 * 2^61
+	// t4 < 0.38 * 2^61
+	// t5 < 0.22 * 2^61
+	// t6 < 0.23 * 2^61
+	// t7 < 0.13 * 2^61
+	// t8 < 0.09 * 2^61
+	// t9 < 0.03 * 2^61
+
+	FE_CARRY; // Everything below 2^62, Carry precondition OK
+}
+
+// Precondition
+// -------------
+//   |f0|, |f2|, |f4|, |f6|, |f8|  <  1.65 * 2^26
+//   |f1|, |f3|, |f5|, |f7|, |f9|  <  1.65 * 2^25
+//
+// Note: we could use fe_mul() for this, but this is significantly faster
+static void fe_sq(fe h, const fe f)
+{
+	i32 f0 = f[0]; i32 f1 = f[1]; i32 f2 = f[2]; i32 f3 = f[3]; i32 f4 = f[4];
+	i32 f5 = f[5]; i32 f6 = f[6]; i32 f7 = f[7]; i32 f8 = f[8]; i32 f9 = f[9];
+	i32 f0_2  = f0*2;   i32 f1_2  = f1*2;   i32 f2_2  = f2*2;   i32 f3_2 = f3*2;
+	i32 f4_2  = f4*2;   i32 f5_2  = f5*2;   i32 f6_2  = f6*2;   i32 f7_2 = f7*2;
+	i32 f5_38 = f5*38;  i32 f6_19 = f6*19;  i32 f7_38 = f7*38;
+	i32 f8_19 = f8*19;  i32 f9_38 = f9*38;
+	// |f0_2| , |f2_2| , |f4_2| , |f6_2| , |f8_2|  <  1.65 * 2^27
+	// |f1_2| , |f3_2| , |f5_2| , |f7_2| , |f9_2|  <  1.65 * 2^26
+	// |f5_38|, |f6_19|, |f7_38|, |f8_19|, |f9_38| <  2^31
+
+	i64 t0 = f0  *(i64)f0    + f1_2*(i64)f9_38 + f2_2*(i64)f8_19
+		+    f3_2*(i64)f7_38 + f4_2*(i64)f6_19 + f5  *(i64)f5_38;
+	i64 t1 = f0_2*(i64)f1    + f2  *(i64)f9_38 + f3_2*(i64)f8_19
+		+    f4  *(i64)f7_38 + f5_2*(i64)f6_19;
+	i64 t2 = f0_2*(i64)f2    + f1_2*(i64)f1    + f3_2*(i64)f9_38
+		+    f4_2*(i64)f8_19 + f5_2*(i64)f7_38 + f6  *(i64)f6_19;
+	i64 t3 = f0_2*(i64)f3    + f1_2*(i64)f2    + f4  *(i64)f9_38
+		+    f5_2*(i64)f8_19 + f6  *(i64)f7_38;
+	i64 t4 = f0_2*(i64)f4    + f1_2*(i64)f3_2  + f2  *(i64)f2
+		+    f5_2*(i64)f9_38 + f6_2*(i64)f8_19 + f7  *(i64)f7_38;
+	i64 t5 = f0_2*(i64)f5    + f1_2*(i64)f4    + f2_2*(i64)f3
+		+    f6  *(i64)f9_38 + f7_2*(i64)f8_19;
+	i64 t6 = f0_2*(i64)f6    + f1_2*(i64)f5_2  + f2_2*(i64)f4
+		+    f3_2*(i64)f3    + f7_2*(i64)f9_38 + f8  *(i64)f8_19;
+	i64 t7 = f0_2*(i64)f7    + f1_2*(i64)f6    + f2_2*(i64)f5
+		+    f3_2*(i64)f4    + f8  *(i64)f9_38;
+	i64 t8 = f0_2*(i64)f8    + f1_2*(i64)f7_2  + f2_2*(i64)f6
+		+    f3_2*(i64)f5_2  + f4  *(i64)f4    + f9  *(i64)f9_38;
+	i64 t9 = f0_2*(i64)f9    + f1_2*(i64)f8    + f2_2*(i64)f7
+		+    f3_2*(i64)f6    + f4  *(i64)f5_2;
+	// t0 < 0.67 * 2^61
+	// t1 < 0.41 * 2^61
+	// t2 < 0.52 * 2^61
+	// t3 < 0.32 * 2^61
+	// t4 < 0.38 * 2^61
+	// t5 < 0.22 * 2^61
+	// t6 < 0.23 * 2^61
+	// t7 < 0.13 * 2^61
+	// t8 < 0.09 * 2^61
+	// t9 < 0.03 * 2^61
+
+	FE_CARRY;
+}
+
+//  Parity check.  Returns 0 if even, 1 if odd
+static int fe_isodd(const fe f)
+{
+	u8 s[32];
+	fe_tobytes(s, f);
+	u8 isodd = s[0] & 1;
+	WIPE_BUFFER(s);
+	return isodd;
+}
+
+// Returns 1 if equal, 0 if not equal
+static int fe_isequal(const fe f, const fe g)
+{
+	u8 fs[32];
+	u8 gs[32];
+	fe_tobytes(fs, f);
+	fe_tobytes(gs, g);
+	int isdifferent = crypto_verify32(fs, gs);
+	WIPE_BUFFER(fs);
+	WIPE_BUFFER(gs);
+	return 1 + isdifferent;
+}
+
+// Inverse square root.
+// Returns true if x is a square, false otherwise.
+// After the call:
+//   isr = sqrt(1/x)        if x is a non-zero square.
+//   isr = sqrt(sqrt(-1)/x) if x is not a square.
+//   isr = 0                if x is zero.
+// We do not guarantee the sign of the square root.
+//
+// Notes:
+// Let quartic = x^((p-1)/4)
+//
+// x^((p-1)/2) = chi(x)
+// quartic^2   = chi(x)
+// quartic     = sqrt(chi(x))
+// quartic     = 1 or -1 or sqrt(-1) or -sqrt(-1)
+//
+// Note that x is a square if quartic is 1 or -1
+// There are 4 cases to consider:
+//
+// if   quartic         = 1  (x is a square)
+// then x^((p-1)/4)     = 1
+//      x^((p-5)/4) * x = 1
+//      x^((p-5)/4)     = 1/x
+//      x^((p-5)/8)     = sqrt(1/x) or -sqrt(1/x)
+//
+// if   quartic                = -1  (x is a square)
+// then x^((p-1)/4)            = -1
+//      x^((p-5)/4) * x        = -1
+//      x^((p-5)/4)            = -1/x
+//      x^((p-5)/8)            = sqrt(-1)   / sqrt(x)
+//      x^((p-5)/8) * sqrt(-1) = sqrt(-1)^2 / sqrt(x)
+//      x^((p-5)/8) * sqrt(-1) = -1/sqrt(x)
+//      x^((p-5)/8) * sqrt(-1) = -sqrt(1/x) or sqrt(1/x)
+//
+// if   quartic         = sqrt(-1)  (x is not a square)
+// then x^((p-1)/4)     = sqrt(-1)
+//      x^((p-5)/4) * x = sqrt(-1)
+//      x^((p-5)/4)     = sqrt(-1)/x
+//      x^((p-5)/8)     = sqrt(sqrt(-1)/x) or -sqrt(sqrt(-1)/x)
+//
+// Note that the product of two non-squares is always a square:
+//   For any non-squares a and b, chi(a) = -1 and chi(b) = -1.
+//   Since chi(x) = x^((p-1)/2), chi(a)*chi(b) = chi(a*b) = 1.
+//   Therefore a*b is a square.
+//
+//   Since sqrt(-1) and x are both non-squares, their product is a
+//   square, and we can compute their square root.
+//
+// if   quartic                = -sqrt(-1)  (x is not a square)
+// then x^((p-1)/4)            = -sqrt(-1)
+//      x^((p-5)/4) * x        = -sqrt(-1)
+//      x^((p-5)/4)            = -sqrt(-1)/x
+//      x^((p-5)/8)            = sqrt(-sqrt(-1)/x)
+//      x^((p-5)/8)            = sqrt( sqrt(-1)/x) * sqrt(-1)
+//      x^((p-5)/8) * sqrt(-1) = sqrt( sqrt(-1)/x) * sqrt(-1)^2
+//      x^((p-5)/8) * sqrt(-1) = sqrt( sqrt(-1)/x) * -1
+//      x^((p-5)/8) * sqrt(-1) = -sqrt(sqrt(-1)/x) or sqrt(sqrt(-1)/x)
+static int invsqrt(fe isr, const fe x)
+{
+	fe t0, t1, t2;
+
+	// t0 = x^((p-5)/8)
+	// Can be achieved with a simple double & add ladder,
+	// but it would be slower.
+	fe_sq(t0, x);
+	fe_sq(t1,t0);                   fe_sq(t1, t1);  fe_mul(t1, x, t1);
+	fe_mul(t0, t0, t1);
+	fe_sq(t0, t0);                                  fe_mul(t0, t1, t0);
+	fe_sq(t1, t0);  FOR (i, 1,   5) fe_sq(t1, t1);  fe_mul(t0, t1, t0);
+	fe_sq(t1, t0);  FOR (i, 1,  10) fe_sq(t1, t1);  fe_mul(t1, t1, t0);
+	fe_sq(t2, t1);  FOR (i, 1,  20) fe_sq(t2, t2);  fe_mul(t1, t2, t1);
+	fe_sq(t1, t1);  FOR (i, 1,  10) fe_sq(t1, t1);  fe_mul(t0, t1, t0);
+	fe_sq(t1, t0);  FOR (i, 1,  50) fe_sq(t1, t1);  fe_mul(t1, t1, t0);
+	fe_sq(t2, t1);  FOR (i, 1, 100) fe_sq(t2, t2);  fe_mul(t1, t2, t1);
+	fe_sq(t1, t1);  FOR (i, 1,  50) fe_sq(t1, t1);  fe_mul(t0, t1, t0);
+	fe_sq(t0, t0);  FOR (i, 1,   2) fe_sq(t0, t0);  fe_mul(t0, t0, x);
+
+	// quartic = x^((p-1)/4)
+	i32 *quartic = t1;
+	fe_sq (quartic, t0);
+	fe_mul(quartic, quartic, x);
+
+	i32 *check = t2;
+	fe_0  (check);          int z0 = fe_isequal(x      , check);
+	fe_1  (check);          int p1 = fe_isequal(quartic, check);
+	fe_neg(check, check );  int m1 = fe_isequal(quartic, check);
+	fe_neg(check, sqrtm1);  int ms = fe_isequal(quartic, check);
+
+	// if quartic == -1 or sqrt(-1)
+	// then  isr = x^((p-1)/4) * sqrt(-1)
+	// else  isr = x^((p-1)/4)
+	fe_mul(isr, t0, sqrtm1);
+	fe_ccopy(isr, t0, 1 - (m1 | ms));
+
+	WIPE_BUFFER(t0);
+	WIPE_BUFFER(t1);
+	WIPE_BUFFER(t2);
+	return p1 | m1 | z0;
+}
+
+// Inverse in terms of inverse square root.
+// Requires two additional squarings to get rid of the sign.
+//
+//   1/x = x * (+invsqrt(x^2))^2
+//       = x * (-invsqrt(x^2))^2
+//
+// A fully optimised exponentiation by p-1 would save 6 field
+// multiplications, but it would require more code.
+static void fe_invert(fe out, const fe x)
+{
+	fe tmp;
+	fe_sq(tmp, x);
+	invsqrt(tmp, tmp);
+	fe_sq(tmp, tmp);
+	fe_mul(out, tmp, x);
+	WIPE_BUFFER(tmp);
+}
+
+// trim a scalar for scalar multiplication
+void crypto_eddsa_trim_scalar(u8 out[32], const u8 in[32])
+{
+	COPY(out, in, 32);
+	out[ 0] &= 248;
+	out[31] &= 127;
+	out[31] |= 64;
+}
+
+// get bit from scalar at position i
+static int scalar_bit(const u8 s[32], int i)
+{
+	if (i < 0) { return 0; } // handle -1 for sliding windows
+	return (s[i>>3] >> (i&7)) & 1;
+}
+
+///////////////
+/// X-25519 /// Taken from SUPERCOP's ref10 implementation.
+///////////////
+static void scalarmult(u8 q[32], const u8 scalar[32], const u8 p[32],
+                       int nb_bits)
+{
+	// computes the scalar product
+	fe x1;
+	fe_frombytes(x1, p);
+
+	// computes the actual scalar product (the result is in x2 and z2)
+	fe x2, z2, x3, z3, t0, t1;
+	// Montgomery ladder
+	// In projective coordinates, to avoid divisions: x = X / Z
+	// We don't care about the y coordinate, it's only 1 bit of information
+	fe_1(x2);        fe_0(z2); // "zero" point
+	fe_copy(x3, x1); fe_1(z3); // "one"  point
+	int swap = 0;
+	for (int pos = nb_bits-1; pos >= 0; --pos) {
+		// constant time conditional swap before ladder step
+		int b = scalar_bit(scalar, pos);
+		swap ^= b; // xor trick avoids swapping at the end of the loop
+		fe_cswap(x2, x3, swap);
+		fe_cswap(z2, z3, swap);
+		swap = b;  // anticipates one last swap after the loop
+
+		// Montgomery ladder step: replaces (P2, P3) by (P2*2, P2+P3)
+		// with differential addition
+		fe_sub(t0, x3, z3);
+		fe_sub(t1, x2, z2);
+		fe_add(x2, x2, z2);
+		fe_add(z2, x3, z3);
+		fe_mul(z3, t0, x2);
+		fe_mul(z2, z2, t1);
+		fe_sq (t0, t1    );
+		fe_sq (t1, x2    );
+		fe_add(x3, z3, z2);
+		fe_sub(z2, z3, z2);
+		fe_mul(x2, t1, t0);
+		fe_sub(t1, t1, t0);
+		fe_sq (z2, z2    );
+		fe_mul_small(z3, t1, 121666);
+		fe_sq (x3, x3    );
+		fe_add(t0, t0, z3);
+		fe_mul(z3, x1, z2);
+		fe_mul(z2, t1, t0);
+	}
+	// last swap is necessary to compensate for the xor trick
+	// Note: after this swap, P3 == P2 + P1.
+	fe_cswap(x2, x3, swap);
+	fe_cswap(z2, z3, swap);
+
+	// normalises the coordinates: x == X / Z
+	fe_invert(z2, z2);
+	fe_mul(x2, x2, z2);
+	fe_tobytes(q, x2);
+
+	WIPE_BUFFER(x1);
+	WIPE_BUFFER(x2);  WIPE_BUFFER(z2);  WIPE_BUFFER(t0);
+	WIPE_BUFFER(x3);  WIPE_BUFFER(z3);  WIPE_BUFFER(t1);
+}
+
+void crypto_x25519(u8       raw_shared_secret[32],
+                   const u8 your_secret_key  [32],
+                   const u8 their_public_key [32])
+{
+	// restrict the possible scalar values
+	u8 e[32];
+	crypto_eddsa_trim_scalar(e, your_secret_key);
+	scalarmult(raw_shared_secret, e, their_public_key, 255);
+	WIPE_BUFFER(e);
+}
+
+void crypto_x25519_public_key(u8       public_key[32],
+                              const u8 secret_key[32])
+{
+	static const u8 base_point[32] = {9};
+	crypto_x25519(public_key, secret_key, base_point);
+}
+
+///////////////////////////
+/// Arithmetic modulo L ///
+///////////////////////////
+static const u32 L[8] = {
+	0x5cf5d3ed, 0x5812631a, 0xa2f79cd6, 0x14def9de,
+	0x00000000, 0x00000000, 0x00000000, 0x10000000,
+};
+
+//  p = a*b + p
+static void multiply(u32 p[16], const u32 a[8], const u32 b[8])
+{
+	FOR (i, 0, 8) {
+		u64 carry = 0;
+		FOR (j, 0, 8) {
+			carry  += p[i+j] + (u64)a[i] * b[j];
+			p[i+j]  = (u32)carry;
+			carry >>= 32;
+		}
+		p[i+8] = (u32)carry;
+	}
+}
+
+static int is_above_l(const u32 x[8])
+{
+	// We work with L directly, in a 2's complement encoding
+	// (-L == ~L + 1)
+	u64 carry = 1;
+	FOR (i, 0, 8) {
+		carry  += (u64)x[i] + (~L[i] & 0xffffffff);
+		carry >>= 32;
+	}
+	return (int)carry; // carry is either 0 or 1
+}
+
+// Final reduction modulo L, by conditionally removing L.
+// if x < l     , then r = x
+// if l <= x 2*l, then r = x-l
+// otherwise the result will be wrong
+static void remove_l(u32 r[8], const u32 x[8])
+{
+	u64 carry = (u64)is_above_l(x);
+	u32 mask  = ~(u32)carry + 1; // carry == 0 or 1
+	FOR (i, 0, 8) {
+		carry += (u64)x[i] + (~L[i] & mask);
+		r[i]   = (u32)carry;
+		carry >>= 32;
+	}
+}
+
+// Full reduction modulo L (Barrett reduction)
+static void mod_l(u8 reduced[32], const u32 x[16])
+{
+	static const u32 r[9] = {
+		0x0a2c131b,0xed9ce5a3,0x086329a7,0x2106215d,
+		0xffffffeb,0xffffffff,0xffffffff,0xffffffff,0xf,
+	};
+	// xr = x * r
+	u32 xr[25] = {0};
+	FOR (i, 0, 9) {
+		u64 carry = 0;
+		FOR (j, 0, 16) {
+			carry  += xr[i+j] + (u64)r[i] * x[j];
+			xr[i+j] = (u32)carry;
+			carry >>= 32;
+		}
+		xr[i+16] = (u32)carry;
+	}
+	// xr = floor(xr / 2^512) * L
+	// Since the result is guaranteed to be below 2*L,
+	// it is enough to only compute the first 256 bits.
+	// The division is performed by saying xr[i+16]. (16 * 32 = 512)
+	ZERO(xr, 8);
+	FOR (i, 0, 8) {
+		u64 carry = 0;
+		FOR (j, 0, 8-i) {
+			carry   += xr[i+j] + (u64)xr[i+16] * L[j];
+			xr[i+j] = (u32)carry;
+			carry >>= 32;
+		}
+	}
+	// xr = x - xr
+	u64 carry = 1;
+	FOR (i, 0, 8) {
+		carry  += (u64)x[i] + (~xr[i] & 0xffffffff);
+		xr[i]   = (u32)carry;
+		carry >>= 32;
+	}
+	// Final reduction modulo L (conditional subtraction)
+	remove_l(xr, xr);
+	store32_le_buf(reduced, xr, 8);
+
+	WIPE_BUFFER(xr);
+}
+
+void crypto_eddsa_reduce(u8 reduced[32], const u8 expanded[64])
+{
+	u32 x[16];
+	load32_le_buf(x, expanded, 16);
+	mod_l(reduced, x);
+	WIPE_BUFFER(x);
+}
+
+// r = (a * b) + c
+void crypto_eddsa_mul_add(u8 r[32],
+                          const u8 a[32], const u8 b[32], const u8 c[32])
+{
+	u32 A[8];  load32_le_buf(A, a, 8);
+	u32 B[8];  load32_le_buf(B, b, 8);
+	u32 p[16]; load32_le_buf(p, c, 8);  ZERO(p + 8, 8);
+	multiply(p, A, B);
+	mod_l(r, p);
+	WIPE_BUFFER(p);
+	WIPE_BUFFER(A);
+	WIPE_BUFFER(B);
+}
+
+///////////////
+/// Ed25519 ///
+///////////////
+
+// Point (group element, ge) in a twisted Edwards curve,
+// in extended projective coordinates.
+// ge        : x  = X/Z, y  = Y/Z, T  = XY/Z
+// ge_cached : Yp = X+Y, Ym = X-Y, T2 = T*D2
+// ge_precomp: Z  = 1
+typedef struct { fe X;  fe Y;  fe Z; fe T;  } ge;
+typedef struct { fe Yp; fe Ym; fe Z; fe T2; } ge_cached;
+typedef struct { fe Yp; fe Ym;       fe T2; } ge_precomp;
+
+static void ge_zero(ge *p)
+{
+	fe_0(p->X);
+	fe_1(p->Y);
+	fe_1(p->Z);
+	fe_0(p->T);
+}
+
+static void ge_tobytes(u8 s[32], const ge *h)
+{
+	fe recip, x, y;
+	fe_invert(recip, h->Z);
+	fe_mul(x, h->X, recip);
+	fe_mul(y, h->Y, recip);
+	fe_tobytes(s, y);
+	s[31] ^= fe_isodd(x) << 7;
+
+	WIPE_BUFFER(recip);
+	WIPE_BUFFER(x);
+	WIPE_BUFFER(y);
+}
+
+// h = -s, where s is a point encoded in 32 bytes
+//
+// Variable time!  Inputs must not be secret!
+// => Use only to *check* signatures.
+//
+// From the specifications:
+//   The encoding of s contains y and the sign of x
+//   x = sqrt((y^2 - 1) / (d*y^2 + 1))
+// In extended coordinates:
+//   X = x, Y = y, Z = 1, T = x*y
+//
+//    Note that num * den is a square iff num / den is a square
+//    If num * den is not a square, the point was not on the curve.
+// From the above:
+//   Let num =   y^2 - 1
+//   Let den = d*y^2 + 1
+//   x = sqrt((y^2 - 1) / (d*y^2 + 1))
+//   x = sqrt(num / den)
+//   x = sqrt(num^2 / (num * den))
+//   x = num * sqrt(1 / (num * den))
+//
+// Therefore, we can just compute:
+//   num =   y^2 - 1
+//   den = d*y^2 + 1
+//   isr = invsqrt(num * den)  // abort if not square
+//   x   = num * isr
+// Finally, negate x if its sign is not as specified.
+static int ge_frombytes_neg_vartime(ge *h, const u8 s[32])
+{
+	fe_frombytes(h->Y, s);
+	fe_1(h->Z);
+	fe_sq (h->T, h->Y);        // t =   y^2
+	fe_mul(h->X, h->T, d   );  // x = d*y^2
+	fe_sub(h->T, h->T, h->Z);  // t =   y^2 - 1
+	fe_add(h->X, h->X, h->Z);  // x = d*y^2 + 1
+	fe_mul(h->X, h->T, h->X);  // x = (y^2 - 1) * (d*y^2 + 1)
+	int is_square = invsqrt(h->X, h->X);
+	if (!is_square) {
+		return -1;             // Not on the curve, abort
+	}
+	fe_mul(h->X, h->T, h->X);  // x = sqrt((y^2 - 1) / (d*y^2 + 1))
+	if (fe_isodd(h->X) == (s[31] >> 7)) {
+		fe_neg(h->X, h->X);
+	}
+	fe_mul(h->T, h->X, h->Y);
+	return 0;
+}
+
+static void ge_cache(ge_cached *c, const ge *p)
+{
+	fe_add (c->Yp, p->Y, p->X);
+	fe_sub (c->Ym, p->Y, p->X);
+	fe_copy(c->Z , p->Z      );
+	fe_mul (c->T2, p->T, D2  );
+}
+
+// Internal buffers are not wiped! Inputs must not be secret!
+// => Use only to *check* signatures.
+static void ge_add(ge *s, const ge *p, const ge_cached *q)
+{
+	fe a, b;
+	fe_add(a   , p->Y, p->X );
+	fe_sub(b   , p->Y, p->X );
+	fe_mul(a   , a   , q->Yp);
+	fe_mul(b   , b   , q->Ym);
+	fe_add(s->Y, a   , b    );
+	fe_sub(s->X, a   , b    );
+
+	fe_add(s->Z, p->Z, p->Z );
+	fe_mul(s->Z, s->Z, q->Z );
+	fe_mul(s->T, p->T, q->T2);
+	fe_add(a   , s->Z, s->T );
+	fe_sub(b   , s->Z, s->T );
+
+	fe_mul(s->T, s->X, s->Y);
+	fe_mul(s->X, s->X, b   );
+	fe_mul(s->Y, s->Y, a   );
+	fe_mul(s->Z, a   , b   );
+}
+
+// Internal buffers are not wiped! Inputs must not be secret!
+// => Use only to *check* signatures.
+static void ge_sub(ge *s, const ge *p, const ge_cached *q)
+{
+	ge_cached neg;
+	fe_copy(neg.Ym, q->Yp);
+	fe_copy(neg.Yp, q->Ym);
+	fe_copy(neg.Z , q->Z );
+	fe_neg (neg.T2, q->T2);
+	ge_add(s, p, &neg);
+}
+
+static void ge_madd(ge *s, const ge *p, const ge_precomp *q, fe a, fe b)
+{
+	fe_add(a   , p->Y, p->X );
+	fe_sub(b   , p->Y, p->X );
+	fe_mul(a   , a   , q->Yp);
+	fe_mul(b   , b   , q->Ym);
+	fe_add(s->Y, a   , b    );
+	fe_sub(s->X, a   , b    );
+
+	fe_add(s->Z, p->Z, p->Z );
+	fe_mul(s->T, p->T, q->T2);
+	fe_add(a   , s->Z, s->T );
+	fe_sub(b   , s->Z, s->T );
+
+	fe_mul(s->T, s->X, s->Y);
+	fe_mul(s->X, s->X, b   );
+	fe_mul(s->Y, s->Y, a   );
+	fe_mul(s->Z, a   , b   );
+}
+
+// Internal buffers are not wiped! Inputs must not be secret!
+// => Use only to *check* signatures.
+static void ge_msub(ge *s, const ge *p, const ge_precomp *q, fe a, fe b)
+{
+	ge_precomp neg;
+	fe_copy(neg.Ym, q->Yp);
+	fe_copy(neg.Yp, q->Ym);
+	fe_neg (neg.T2, q->T2);
+	ge_madd(s, p, &neg, a, b);
+}
+
+static void ge_double(ge *s, const ge *p, ge *q)
+{
+	fe_sq (q->X, p->X);
+	fe_sq (q->Y, p->Y);
+	fe_sq (q->Z, p->Z);          // qZ = pZ^2
+	fe_mul_small(q->Z, q->Z, 2); // qZ = pZ^2 * 2
+	fe_add(q->T, p->X, p->Y);
+	fe_sq (s->T, q->T);
+	fe_add(q->T, q->Y, q->X);
+	fe_sub(q->Y, q->Y, q->X);
+	fe_sub(q->X, s->T, q->T);
+	fe_sub(q->Z, q->Z, q->Y);
+
+	fe_mul(s->X, q->X , q->Z);
+	fe_mul(s->Y, q->T , q->Y);
+	fe_mul(s->Z, q->Y , q->Z);
+	fe_mul(s->T, q->X , q->T);
+}
+
+// 5-bit signed window in cached format (Niels coordinates, Z=1)
+static const ge_precomp b_window[8] = {
+	{{25967493,-14356035,29566456,3660896,-12694345,
+	  4014787,27544626,-11754271,-6079156,2047605,},
+	 {-12545711,934262,-2722910,3049990,-727428,
+	  9406986,12720692,5043384,19500929,-15469378,},
+	 {-8738181,4489570,9688441,-14785194,10184609,
+	  -12363380,29287919,11864899,-24514362,-4438546,},},
+	{{15636291,-9688557,24204773,-7912398,616977,
+	  -16685262,27787600,-14772189,28944400,-1550024,},
+	 {16568933,4717097,-11556148,-1102322,15682896,
+	  -11807043,16354577,-11775962,7689662,11199574,},
+	 {30464156,-5976125,-11779434,-15670865,23220365,
+	  15915852,7512774,10017326,-17749093,-9920357,},},
+	{{10861363,11473154,27284546,1981175,-30064349,
+	  12577861,32867885,14515107,-15438304,10819380,},
+	 {4708026,6336745,20377586,9066809,-11272109,
+	  6594696,-25653668,12483688,-12668491,5581306,},
+	 {19563160,16186464,-29386857,4097519,10237984,
+	  -4348115,28542350,13850243,-23678021,-15815942,},},
+	{{5153746,9909285,1723747,-2777874,30523605,
+	  5516873,19480852,5230134,-23952439,-15175766,},
+	 {-30269007,-3463509,7665486,10083793,28475525,
+	  1649722,20654025,16520125,30598449,7715701,},
+	 {28881845,14381568,9657904,3680757,-20181635,
+	  7843316,-31400660,1370708,29794553,-1409300,},},
+	{{-22518993,-6692182,14201702,-8745502,-23510406,
+	  8844726,18474211,-1361450,-13062696,13821877,},
+	 {-6455177,-7839871,3374702,-4740862,-27098617,
+	  -10571707,31655028,-7212327,18853322,-14220951,},
+	 {4566830,-12963868,-28974889,-12240689,-7602672,
+	  -2830569,-8514358,-10431137,2207753,-3209784,},},
+	{{-25154831,-4185821,29681144,7868801,-6854661,
+	  -9423865,-12437364,-663000,-31111463,-16132436,},
+	 {25576264,-2703214,7349804,-11814844,16472782,
+	  9300885,3844789,15725684,171356,6466918,},
+	 {23103977,13316479,9739013,-16149481,817875,
+	  -15038942,8965339,-14088058,-30714912,16193877,},},
+	{{-33521811,3180713,-2394130,14003687,-16903474,
+	  -16270840,17238398,4729455,-18074513,9256800,},
+	 {-25182317,-4174131,32336398,5036987,-21236817,
+	  11360617,22616405,9761698,-19827198,630305,},
+	 {-13720693,2639453,-24237460,-7406481,9494427,
+	  -5774029,-6554551,-15960994,-2449256,-14291300,},},
+	{{-3151181,-5046075,9282714,6866145,-31907062,
+	  -863023,-18940575,15033784,25105118,-7894876,},
+	 {-24326370,15950226,-31801215,-14592823,-11662737,
+	  -5090925,1573892,-2625887,2198790,-15804619,},
+	 {-3099351,10324967,-2241613,7453183,-5446979,
+	  -2735503,-13812022,-16236442,-32461234,-12290683,},},
+};
+
+// Incremental sliding windows (left to right)
+// Based on Roberto Maria Avanzi[2005]
+typedef struct {
+	i16 next_index; // position of the next signed digit
+	i8  next_digit; // next signed digit (odd number below 2^window_width)
+	u8  next_check; // point at which we must check for a new window
+} slide_ctx;
+
+static void slide_init(slide_ctx *ctx, const u8 scalar[32])
+{
+	// scalar is guaranteed to be below L, either because we checked (s),
+	// or because we reduced it modulo L (h_ram). L is under 2^253, so
+	// so bits 253 to 255 are guaranteed to be zero. No need to test them.
+	//
+	// Note however that L is very close to 2^252, so bit 252 is almost
+	// always zero.  If we were to start at bit 251, the tests wouldn't
+	// catch the off-by-one error (constructing one that does would be
+	// prohibitively expensive).
+	//
+	// We should still check bit 252, though.
+	int i = 252;
+	while (i > 0 && scalar_bit(scalar, i) == 0) {
+		i--;
+	}
+	ctx->next_check = (u8)(i + 1);
+	ctx->next_index = -1;
+	ctx->next_digit = -1;
+}
+
+static int slide_step(slide_ctx *ctx, int width, int i, const u8 scalar[32])
+{
+	if (i == ctx->next_check) {
+		if (scalar_bit(scalar, i) == scalar_bit(scalar, i - 1)) {
+			ctx->next_check--;
+		} else {
+			// compute digit of next window
+			int w = MIN(width, i + 1);
+			int v = -(scalar_bit(scalar, i) << (w-1));
+			FOR_T (int, j, 0, w-1) {
+				v += scalar_bit(scalar, i-(w-1)+j) << j;
+			}
+			v += scalar_bit(scalar, i-w);
+			int lsb = v & (~v + 1); // smallest bit of v
+			int s   =               // log2(lsb)
+				(((lsb & 0xAA) != 0) << 0) |
+				(((lsb & 0xCC) != 0) << 1) |
+				(((lsb & 0xF0) != 0) << 2);
+			ctx->next_index  = (i16)(i-(w-1)+s);
+			ctx->next_digit  = (i8) (v >> s   );
+			ctx->next_check -= (u8) w;
+		}
+	}
+	return i == ctx->next_index ? ctx->next_digit: 0;
+}
+
+#define P_W_WIDTH 3 // Affects the size of the stack
+#define B_W_WIDTH 5 // Affects the size of the binary
+#define P_W_SIZE  (1<<(P_W_WIDTH-2))
+
+int crypto_eddsa_check_equation(const u8 signature[64], const u8 public_key[32],
+                                const u8 h[32])
+{
+	ge minus_A; // -public_key
+	ge minus_R; // -first_half_of_signature
+	const u8 *s = signature + 32;
+
+	// Check that A and R are on the curve
+	// Check that 0 <= S < L (prevents malleability)
+	// *Allow* non-cannonical encoding for A and R
+	{
+		u32 s32[8];
+		load32_le_buf(s32, s, 8);
+		if (ge_frombytes_neg_vartime(&minus_A, public_key) ||
+		    ge_frombytes_neg_vartime(&minus_R, signature)  ||
+		    is_above_l(s32)) {
+			return -1;
+		}
+	}
+
+	// look-up table for minus_A
+	ge_cached lutA[P_W_SIZE];
+	{
+		ge minus_A2, tmp;
+		ge_double(&minus_A2, &minus_A, &tmp);
+		ge_cache(&lutA[0], &minus_A);
+		FOR (i, 1, P_W_SIZE) {
+			ge_add(&tmp, &minus_A2, &lutA[i-1]);
+			ge_cache(&lutA[i], &tmp);
+		}
+	}
+
+	// sum = [s]B - [h]A
+	// Merged double and add ladder, fused with sliding
+	slide_ctx h_slide;  slide_init(&h_slide, h);
+	slide_ctx s_slide;  slide_init(&s_slide, s);
+	int i = MAX(h_slide.next_check, s_slide.next_check);
+	ge *sum = &minus_A; // reuse minus_A for the sum
+	ge_zero(sum);
+	while (i >= 0) {
+		ge tmp;
+		ge_double(sum, sum, &tmp);
+		int h_digit = slide_step(&h_slide, P_W_WIDTH, i, h);
+		int s_digit = slide_step(&s_slide, B_W_WIDTH, i, s);
+		if (h_digit > 0) { ge_add(sum, sum, &lutA[ h_digit / 2]); }
+		if (h_digit < 0) { ge_sub(sum, sum, &lutA[-h_digit / 2]); }
+		fe t1, t2;
+		if (s_digit > 0) { ge_madd(sum, sum, b_window +  s_digit/2, t1, t2); }
+		if (s_digit < 0) { ge_msub(sum, sum, b_window + -s_digit/2, t1, t2); }
+		i--;
+	}
+
+	// Compare [8](sum-R) and the zero point
+	// The multiplication by 8 eliminates any low-order component
+	// and ensures consistency with batched verification.
+	ge_cached cached;
+	u8 check[32];
+	static const u8 zero_point[32] = {1}; // Point of order 1
+	ge_cache(&cached, &minus_R);
+	ge_add(sum, sum, &cached);
+	ge_double(sum, sum, &minus_R); // reuse minus_R as temporary
+	ge_double(sum, sum, &minus_R); // reuse minus_R as temporary
+	ge_double(sum, sum, &minus_R); // reuse minus_R as temporary
+	ge_tobytes(check, sum);
+	return crypto_verify32(check, zero_point);
+}
+
+// 5-bit signed comb in cached format (Niels coordinates, Z=1)
+static const ge_precomp b_comb_low[8] = {
+	{{-6816601,-2324159,-22559413,124364,18015490,
+	  8373481,19993724,1979872,-18549925,9085059,},
+	 {10306321,403248,14839893,9633706,8463310,
+	  -8354981,-14305673,14668847,26301366,2818560,},
+	 {-22701500,-3210264,-13831292,-2927732,-16326337,
+	  -14016360,12940910,177905,12165515,-2397893,},},
+	{{-12282262,-7022066,9920413,-3064358,-32147467,
+	  2927790,22392436,-14852487,2719975,16402117,},
+	 {-7236961,-4729776,2685954,-6525055,-24242706,
+	  -15940211,-6238521,14082855,10047669,12228189,},
+	 {-30495588,-12893761,-11161261,3539405,-11502464,
+	  16491580,-27286798,-15030530,-7272871,-15934455,},},
+	{{17650926,582297,-860412,-187745,-12072900,
+	  -10683391,-20352381,15557840,-31072141,-5019061,},
+	 {-6283632,-2259834,-4674247,-4598977,-4089240,
+	  12435688,-31278303,1060251,6256175,10480726,},
+	 {-13871026,2026300,-21928428,-2741605,-2406664,
+	  -8034988,7355518,15733500,-23379862,7489131,},},
+	{{6883359,695140,23196907,9644202,-33430614,
+	  11354760,-20134606,6388313,-8263585,-8491918,},
+	 {-7716174,-13605463,-13646110,14757414,-19430591,
+	  -14967316,10359532,-11059670,-21935259,12082603,},
+	 {-11253345,-15943946,10046784,5414629,24840771,
+	  8086951,-6694742,9868723,15842692,-16224787,},},
+	{{9639399,11810955,-24007778,-9320054,3912937,
+	  -9856959,996125,-8727907,-8919186,-14097242,},
+	 {7248867,14468564,25228636,-8795035,14346339,
+	  8224790,6388427,-7181107,6468218,-8720783,},
+	 {15513115,15439095,7342322,-10157390,18005294,
+	  -7265713,2186239,4884640,10826567,7135781,},},
+	{{-14204238,5297536,-5862318,-6004934,28095835,
+	  4236101,-14203318,1958636,-16816875,3837147,},
+	 {-5511166,-13176782,-29588215,12339465,15325758,
+	  -15945770,-8813185,11075932,-19608050,-3776283,},
+	 {11728032,9603156,-4637821,-5304487,-7827751,
+	  2724948,31236191,-16760175,-7268616,14799772,},},
+	{{-28842672,4840636,-12047946,-9101456,-1445464,
+	  381905,-30977094,-16523389,1290540,12798615,},
+	 {27246947,-10320914,14792098,-14518944,5302070,
+	  -8746152,-3403974,-4149637,-27061213,10749585,},
+	 {25572375,-6270368,-15353037,16037944,1146292,
+	  32198,23487090,9585613,24714571,-1418265,},},
+	{{19844825,282124,-17583147,11004019,-32004269,
+	  -2716035,6105106,-1711007,-21010044,14338445,},
+	 {8027505,8191102,-18504907,-12335737,25173494,
+	  -5923905,15446145,7483684,-30440441,10009108,},
+	 {-14134701,-4174411,10246585,-14677495,33553567,
+	  -14012935,23366126,15080531,-7969992,7663473,},},
+};
+
+static const ge_precomp b_comb_high[8] = {
+	{{33055887,-4431773,-521787,6654165,951411,
+	  -6266464,-5158124,6995613,-5397442,-6985227,},
+	 {4014062,6967095,-11977872,3960002,8001989,
+	  5130302,-2154812,-1899602,-31954493,-16173976,},
+	 {16271757,-9212948,23792794,731486,-25808309,
+	  -3546396,6964344,-4767590,10976593,10050757,},},
+	{{2533007,-4288439,-24467768,-12387405,-13450051,
+	  14542280,12876301,13893535,15067764,8594792,},
+	 {20073501,-11623621,3165391,-13119866,13188608,
+	  -11540496,-10751437,-13482671,29588810,2197295,},
+	 {-1084082,11831693,6031797,14062724,14748428,
+	  -8159962,-20721760,11742548,31368706,13161200,},},
+	{{2050412,-6457589,15321215,5273360,25484180,
+	  124590,-18187548,-7097255,-6691621,-14604792,},
+	 {9938196,2162889,-6158074,-1711248,4278932,
+	  -2598531,-22865792,-7168500,-24323168,11746309,},
+	 {-22691768,-14268164,5965485,9383325,20443693,
+	  5854192,28250679,-1381811,-10837134,13717818,},},
+	{{-8495530,16382250,9548884,-4971523,-4491811,
+	  -3902147,6182256,-12832479,26628081,10395408,},
+	 {27329048,-15853735,7715764,8717446,-9215518,
+	  -14633480,28982250,-5668414,4227628,242148,},
+	 {-13279943,-7986904,-7100016,8764468,-27276630,
+	  3096719,29678419,-9141299,3906709,11265498,},},
+	{{11918285,15686328,-17757323,-11217300,-27548967,
+	  4853165,-27168827,6807359,6871949,-1075745,},
+	 {-29002610,13984323,-27111812,-2713442,28107359,
+	  -13266203,6155126,15104658,3538727,-7513788,},
+	 {14103158,11233913,-33165269,9279850,31014152,
+	  4335090,-1827936,4590951,13960841,12787712,},},
+	{{1469134,-16738009,33411928,13942824,8092558,
+	  -8778224,-11165065,1437842,22521552,-2792954,},
+	 {31352705,-4807352,-25327300,3962447,12541566,
+	  -9399651,-27425693,7964818,-23829869,5541287,},
+	 {-25732021,-6864887,23848984,3039395,-9147354,
+	  6022816,-27421653,10590137,25309915,-1584678,},},
+	{{-22951376,5048948,31139401,-190316,-19542447,
+	  -626310,-17486305,-16511925,-18851313,-12985140,},
+	 {-9684890,14681754,30487568,7717771,-10829709,
+	  9630497,30290549,-10531496,-27798994,-13812825,},
+	 {5827835,16097107,-24501327,12094619,7413972,
+	  11447087,28057551,-1793987,-14056981,4359312,},},
+	{{26323183,2342588,-21887793,-1623758,-6062284,
+	  2107090,-28724907,9036464,-19618351,-13055189,},
+	 {-29697200,14829398,-4596333,14220089,-30022969,
+	  2955645,12094100,-13693652,-5941445,7047569,},
+	 {-3201977,14413268,-12058324,-16417589,-9035655,
+	  -7224648,9258160,1399236,30397584,-5684634,},},
+};
+
+static void lookup_add(ge *p, ge_precomp *tmp_c, fe tmp_a, fe tmp_b,
+                       const ge_precomp comb[8], const u8 scalar[32], int i)
+{
+	u8 teeth = (u8)((scalar_bit(scalar, i)          ) +
+	                (scalar_bit(scalar, i + 32) << 1) +
+	                (scalar_bit(scalar, i + 64) << 2) +
+	                (scalar_bit(scalar, i + 96) << 3));
+	u8 high  = teeth >> 3;
+	u8 index = (teeth ^ (high - 1)) & 7;
+	FOR (j, 0, 8) {
+		i32 select = 1 & (((j ^ index) - 1) >> 8);
+		fe_ccopy(tmp_c->Yp, comb[j].Yp, select);
+		fe_ccopy(tmp_c->Ym, comb[j].Ym, select);
+		fe_ccopy(tmp_c->T2, comb[j].T2, select);
+	}
+	fe_neg(tmp_a, tmp_c->T2);
+	fe_cswap(tmp_c->T2, tmp_a    , high ^ 1);
+	fe_cswap(tmp_c->Yp, tmp_c->Ym, high ^ 1);
+	ge_madd(p, p, tmp_c, tmp_a, tmp_b);
+}
+
+// p = [scalar]B, where B is the base point
+static void ge_scalarmult_base(ge *p, const u8 scalar[32])
+{
+	// twin 4-bits signed combs, from Mike Hamburg's
+	// Fast and compact elliptic-curve cryptography (2012)
+	// 1 / 2 modulo L
+	static const u8 half_mod_L[32] = {
+		247,233,122,46,141,49,9,44,107,206,123,81,239,124,111,10,
+		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,
+	};
+	// (2^256 - 1) / 2 modulo L
+	static const u8 half_ones[32] = {
+		142,74,204,70,186,24,118,107,184,231,190,57,250,173,119,99,
+		255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,7,
+	};
+
+	// All bits set form: 1 means 1, 0 means -1
+	u8 s_scalar[32];
+	crypto_eddsa_mul_add(s_scalar, scalar, half_mod_L, half_ones);
+
+	// Double and add ladder
+	fe tmp_a, tmp_b;  // temporaries for addition
+	ge_precomp tmp_c; // temporary for comb lookup
+	ge tmp_d;         // temporary for doubling
+	fe_1(tmp_c.Yp);
+	fe_1(tmp_c.Ym);
+	fe_0(tmp_c.T2);
+
+	// Save a double on the first iteration
+	ge_zero(p);
+	lookup_add(p, &tmp_c, tmp_a, tmp_b, b_comb_low , s_scalar, 31);
+	lookup_add(p, &tmp_c, tmp_a, tmp_b, b_comb_high, s_scalar, 31+128);
+	// Regular double & add for the rest
+	for (int i = 30; i >= 0; i--) {
+		ge_double(p, p, &tmp_d);
+		lookup_add(p, &tmp_c, tmp_a, tmp_b, b_comb_low , s_scalar, i);
+		lookup_add(p, &tmp_c, tmp_a, tmp_b, b_comb_high, s_scalar, i+128);
+	}
+	// Note: we could save one addition at the end if we assumed the
+	// scalar fit in 252 bits.  Which it does in practice if it is
+	// selected at random.  However, non-random, non-hashed scalars
+	// *can* overflow 252 bits in practice.  Better account for that
+	// than leaving that kind of subtle corner case.
+
+	WIPE_BUFFER(tmp_a);  WIPE_CTX(&tmp_d);
+	WIPE_BUFFER(tmp_b);  WIPE_CTX(&tmp_c);
+	WIPE_BUFFER(s_scalar);
+}
+
+void crypto_eddsa_scalarbase(u8 point[32], const u8 scalar[32])
+{
+	ge P;
+	ge_scalarmult_base(&P, scalar);
+	ge_tobytes(point, &P);
+	WIPE_CTX(&P);
+}
+
+void crypto_eddsa_key_pair(u8 secret_key[64], u8 public_key[32], u8 seed[32])
+{
+	// To allow overlaps, observable writes happen in this order:
+	// 1. seed
+	// 2. secret_key
+	// 3. public_key
+	u8 a[64];
+	COPY(a, seed, 32);
+	crypto_wipe(seed, 32);
+	COPY(secret_key, a, 32);
+	crypto_blake2b(a, 64, a, 32);
+	crypto_eddsa_trim_scalar(a, a);
+	crypto_eddsa_scalarbase(secret_key + 32, a);
+	COPY(public_key, secret_key + 32, 32);
+	WIPE_BUFFER(a);
+}
+
+static void hash_reduce(u8 h[32],
+                        const u8 *a, size_t a_size,
+                        const u8 *b, size_t b_size,
+                        const u8 *c, size_t c_size)
+{
+	u8 hash[64];
+	crypto_blake2b_ctx ctx;
+	crypto_blake2b_init  (&ctx, 64);
+	crypto_blake2b_update(&ctx, a, a_size);
+	crypto_blake2b_update(&ctx, b, b_size);
+	crypto_blake2b_update(&ctx, c, c_size);
+	crypto_blake2b_final (&ctx, hash);
+	crypto_eddsa_reduce(h, hash);
+}
+
+// Digital signature of a message with from a secret key.
+//
+// The secret key comprises two parts:
+// - The seed that generates the key (secret_key[ 0..31])
+// - The public key                  (secret_key[32..63])
+//
+// The seed and the public key are bundled together to make sure users
+// don't use mismatched seeds and public keys, which would instantly
+// leak the secret scalar and allow forgeries (allowing this to happen
+// has resulted in critical vulnerabilities in the wild).
+//
+// The seed is hashed to derive the secret scalar and a secret prefix.
+// The sole purpose of the prefix is to generate a secret random nonce.
+// The properties of that nonce must be as follows:
+// - Unique: we need a different one for each message.
+// - Secret: third parties must not be able to predict it.
+// - Random: any detectable bias would break all security.
+//
+// There are two ways to achieve these properties.  The obvious one is
+// to simply generate a random number.  Here that would be a parameter
+// (Monocypher doesn't have an RNG).  It works, but then users may reuse
+// the nonce by accident, which _also_ leaks the secret scalar and
+// allows forgeries.  This has happened in the wild too.
+//
+// This is no good, so instead we generate that nonce deterministically
+// by reducing modulo L a hash of the secret prefix and the message.
+// The secret prefix makes the nonce unpredictable, the message makes it
+// unique, and the hash/reduce removes all bias.
+//
+// The cost of that safety is hashing the message twice.  If that cost
+// is unacceptable, there are two alternatives:
+//
+// - Signing a hash of the message instead of the message itself.  This
+//   is fine as long as the hash is collision resistant. It is not
+//   compatible with existing "pure" signatures, but at least it's safe.
+//
+// - Using a random nonce.  Please exercise **EXTREME CAUTION** if you
+//   ever do that.  It is absolutely **critical** that the nonce is
+//   really an unbiased random number between 0 and L-1, never reused,
+//   and wiped immediately.
+//
+//   To lower the likelihood of complete catastrophe if the RNG is
+//   either flawed or misused, you can hash the RNG output together with
+//   the secret prefix and the beginning of the message, and use the
+//   reduction of that hash instead of the RNG output itself.  It's not
+//   foolproof (you'd need to hash the whole message) but it helps.
+//
+// Signing a message involves the following operations:
+//
+//   scalar, prefix = HASH(secret_key)
+//   r              = HASH(prefix || message) % L
+//   R              = [r]B
+//   h              = HASH(R || public_key || message) % L
+//   S              = ((h * a) + r) % L
+//   signature      = R || S
+void crypto_eddsa_sign(u8 signature [64], const u8 secret_key[64],
+                       const u8 *message, size_t message_size)
+{
+	u8 a[64];  // secret scalar and prefix
+	u8 r[32];  // secret deterministic "random" nonce
+	u8 h[32];  // publically verifiable hash of the message (not wiped)
+	u8 R[32];  // first half of the signature (allows overlapping inputs)
+
+	crypto_blake2b(a, 64, secret_key, 32);
+	crypto_eddsa_trim_scalar(a, a);
+	hash_reduce(r, a + 32, 32, message, message_size, 0, 0);
+	crypto_eddsa_scalarbase(R, r);
+	hash_reduce(h, R, 32, secret_key + 32, 32, message, message_size);
+	COPY(signature, R, 32);
+	crypto_eddsa_mul_add(signature + 32, h, a, r);
+
+	WIPE_BUFFER(a);
+	WIPE_BUFFER(r);
+}
+
+// To check the signature R, S of the message M with the public key A,
+// there are 3 steps:
+//
+//   compute h = HASH(R || A || message) % L
+//   check that A is on the curve.
+//   check that R == [s]B - [h]A
+//
+// The last two steps are done in crypto_eddsa_check_equation()
+int crypto_eddsa_check(const u8  signature[64], const u8 public_key[32],
+                       const u8 *message, size_t message_size)
+{
+	u8 h[32];
+	hash_reduce(h, signature, 32, public_key, 32, message, message_size);
+	return crypto_eddsa_check_equation(signature, public_key, h);
+}
+
+/////////////////////////
+/// EdDSA <--> X25519 ///
+/////////////////////////
+void crypto_eddsa_to_x25519(u8 x25519[32], const u8 eddsa[32])
+{
+	// (u, v) = ((1+y)/(1-y), sqrt(-486664)*u/x)
+	// Only converting y to u, the sign of x is ignored.
+	fe t1, t2;
+	fe_frombytes(t2, eddsa);
+	fe_add(t1, fe_one, t2);
+	fe_sub(t2, fe_one, t2);
+	fe_invert(t2, t2);
+	fe_mul(t1, t1, t2);
+	fe_tobytes(x25519, t1);
+	WIPE_BUFFER(t1);
+	WIPE_BUFFER(t2);
+}
+
+void crypto_x25519_to_eddsa(u8 eddsa[32], const u8 x25519[32])
+{
+	// (x, y) = (sqrt(-486664)*u/v, (u-1)/(u+1))
+	// Only converting u to y, x is assumed positive.
+	fe t1, t2;
+	fe_frombytes(t2, x25519);
+	fe_sub(t1, t2, fe_one);
+	fe_add(t2, t2, fe_one);
+	fe_invert(t2, t2);
+	fe_mul(t1, t1, t2);
+	fe_tobytes(eddsa, t1);
+	WIPE_BUFFER(t1);
+	WIPE_BUFFER(t2);
+}
+
+/////////////////////////////////////////////
+/// Dirty ephemeral public key generation ///
+/////////////////////////////////////////////
+
+// Those functions generates a public key, *without* clearing the
+// cofactor.  Sending that key over the network leaks 3 bits of the
+// private key.  Use only to generate ephemeral keys that will be hidden
+// with crypto_curve_to_hidden().
+//
+// The public key is otherwise compatible with crypto_x25519(), which
+// properly clears the cofactor.
+//
+// Note that the distribution of the resulting public keys is almost
+// uniform.  Flipping the sign of the v coordinate (not provided by this
+// function), covers the entire key space almost perfectly, where
+// "almost" means a 2^-128 bias (undetectable).  This uniformity is
+// needed to ensure the proper randomness of the resulting
+// representatives (once we apply crypto_curve_to_hidden()).
+//
+// Recall that Curve25519 has order C = 2^255 + e, with e < 2^128 (not
+// to be confused with the prime order of the main subgroup, L, which is
+// 8 times less than that).
+//
+// Generating all points would require us to multiply a point of order C
+// (the base point plus any point of order 8) by all scalars from 0 to
+// C-1.  Clamping limits us to scalars between 2^254 and 2^255 - 1. But
+// by negating the resulting point at random, we also cover scalars from
+// -2^255 + 1 to -2^254 (which modulo C is congruent to e+1 to 2^254 + e).
+//
+// In practice:
+// - Scalars from 0         to e + 1     are never generated
+// - Scalars from 2^255     to 2^255 + e are never generated
+// - Scalars from 2^254 + 1 to 2^254 + e are generated twice
+//
+// Since e < 2^128, detecting this bias requires observing over 2^100
+// representatives from a given source (this will never happen), *and*
+// recovering enough of the private key to determine that they do, or do
+// not, belong to the biased set (this practically requires solving
+// discrete logarithm, which is conjecturally intractable).
+//
+// In practice, this means the bias is impossible to detect.
+
+// s + (x*L) % 8*L
+// Guaranteed to fit in 256 bits iff s fits in 255 bits.
+//   L             < 2^253
+//   x%8           < 2^3
+//   L * (x%8)     < 2^255
+//   s             < 2^255
+//   s + L * (x%8) < 2^256
+static void add_xl(u8 s[32], u8 x)
+{
+	u64 mod8  = x & 7;
+	u64 carry = 0;
+	FOR (i , 0, 8) {
+		carry = carry + load32_le(s + 4*i) + L[i] * mod8;
+		store32_le(s + 4*i, (u32)carry);
+		carry >>= 32;
+	}
+}
+
+// "Small" dirty ephemeral key.
+// Use if you need to shrink the size of the binary, and can afford to
+// slow down by a factor of two (compared to the fast version)
+//
+// This version works by decoupling the cofactor from the main factor.
+//
+// - The trimmed scalar determines the main factor
+// - The clamped bits of the scalar determine the cofactor.
+//
+// Cofactor and main factor are combined into a single scalar, which is
+// then multiplied by a point of order 8*L (unlike the base point, which
+// has prime order).  That "dirty" base point is the addition of the
+// regular base point (9), and a point of order 8.
+void crypto_x25519_dirty_small(u8 public_key[32], const u8 secret_key[32])
+{
+	// Base point of order 8*L
+	// Raw scalar multiplication with it does not clear the cofactor,
+	// and the resulting public key will reveal 3 bits of the scalar.
+	//
+	// The low order component of this base point  has been chosen
+	// to yield the same results as crypto_x25519_dirty_fast().
+	static const u8 dirty_base_point[32] = {
+		0xd8, 0x86, 0x1a, 0xa2, 0x78, 0x7a, 0xd9, 0x26,
+		0x8b, 0x74, 0x74, 0xb6, 0x82, 0xe3, 0xbe, 0xc3,
+		0xce, 0x36, 0x9a, 0x1e, 0x5e, 0x31, 0x47, 0xa2,
+		0x6d, 0x37, 0x7c, 0xfd, 0x20, 0xb5, 0xdf, 0x75,
+	};
+	// separate the main factor & the cofactor of the scalar
+	u8 scalar[32];
+	crypto_eddsa_trim_scalar(scalar, secret_key);
+
+	// Separate the main factor and the cofactor
+	//
+	// The scalar is trimmed, so its cofactor is cleared.  The three
+	// least significant bits however still have a main factor.  We must
+	// remove it for X25519 compatibility.
+	//
+	//   cofactor = lsb * L            (modulo 8*L)
+	//   combined = scalar + cofactor  (modulo 8*L)
+	add_xl(scalar, secret_key[0]);
+	scalarmult(public_key, scalar, dirty_base_point, 256);
+	WIPE_BUFFER(scalar);
+}
+
+// Select low order point
+// We're computing the [cofactor]lop scalar multiplication, where:
+//
+//   cofactor = tweak & 7.
+//   lop      = (lop_x, lop_y)
+//   lop_x    = sqrt((sqrt(d + 1) + 1) / d)
+//   lop_y    = -lop_x * sqrtm1
+//
+// The low order point has order 8. There are 4 such points.  We've
+// chosen the one whose both coordinates are positive (below p/2).
+// The 8 low order points are as follows:
+//
+// [0]lop = ( 0       ,  1    )
+// [1]lop = ( lop_x   ,  lop_y)
+// [2]lop = ( sqrt(-1), -0    )
+// [3]lop = ( lop_x   , -lop_y)
+// [4]lop = (-0       , -1    )
+// [5]lop = (-lop_x   , -lop_y)
+// [6]lop = (-sqrt(-1),  0    )
+// [7]lop = (-lop_x   ,  lop_y)
+//
+// The x coordinate is either 0, sqrt(-1), lop_x, or their opposite.
+// The y coordinate is either 0,      -1 , lop_y, or their opposite.
+// The pattern for both is the same, except for a rotation of 2 (modulo 8)
+//
+// This helper function captures the pattern, and we can use it thus:
+//
+//    select_lop(x, lop_x, sqrtm1, cofactor);
+//    select_lop(y, lop_y, fe_one, cofactor + 2);
+//
+// This is faster than an actual scalar multiplication,
+// and requires less code than naive constant time look up.
+static void select_lop(fe out, const fe x, const fe k, u8 cofactor)
+{
+	fe tmp;
+	fe_0(out);
+	fe_ccopy(out, k  , (cofactor >> 1) & 1); // bit 1
+	fe_ccopy(out, x  , (cofactor >> 0) & 1); // bit 0
+	fe_neg  (tmp, out);
+	fe_ccopy(out, tmp, (cofactor >> 2) & 1); // bit 2
+	WIPE_BUFFER(tmp);
+}
+
+// "Fast" dirty ephemeral key
+// We use this one by default.
+//
+// This version works by performing a regular scalar multiplication,
+// then add a low order point.  The scalar multiplication is done in
+// Edwards space for more speed (*2 compared to the "small" version).
+// The cost is a bigger binary for programs that don't also sign messages.
+void crypto_x25519_dirty_fast(u8 public_key[32], const u8 secret_key[32])
+{
+	// Compute clean scalar multiplication
+	u8 scalar[32];
+	ge pk;
+	crypto_eddsa_trim_scalar(scalar, secret_key);
+	ge_scalarmult_base(&pk, scalar);
+
+	// Compute low order point
+	fe t1, t2;
+	select_lop(t1, lop_x, sqrtm1, secret_key[0]);
+	select_lop(t2, lop_y, fe_one, secret_key[0] + 2);
+	ge_precomp low_order_point;
+	fe_add(low_order_point.Yp, t2, t1);
+	fe_sub(low_order_point.Ym, t2, t1);
+	fe_mul(low_order_point.T2, t2, t1);
+	fe_mul(low_order_point.T2, low_order_point.T2, D2);
+
+	// Add low order point to the public key
+	ge_madd(&pk, &pk, &low_order_point, t1, t2);
+
+	// Convert to Montgomery u coordinate (we ignore the sign)
+	fe_add(t1, pk.Z, pk.Y);
+	fe_sub(t2, pk.Z, pk.Y);
+	fe_invert(t2, t2);
+	fe_mul(t1, t1, t2);
+
+	fe_tobytes(public_key, t1);
+
+	WIPE_BUFFER(t1);    WIPE_CTX(&pk);
+	WIPE_BUFFER(t2);    WIPE_CTX(&low_order_point);
+	WIPE_BUFFER(scalar);
+}
+
+///////////////////
+/// Elligator 2 ///
+///////////////////
+static const fe A = {486662};
+
+// Elligator direct map
+//
+// Computes the point corresponding to a representative, encoded in 32
+// bytes (little Endian).  Since positive representatives fits in 254
+// bits, The two most significant bits are ignored.
+//
+// From the paper:
+// w = -A / (fe(1) + non_square * r^2)
+// e = chi(w^3 + A*w^2 + w)
+// u = e*w - (fe(1)-e)*(A//2)
+// v = -e * sqrt(u^3 + A*u^2 + u)
+//
+// We ignore v because we don't need it for X25519 (the Montgomery
+// ladder only uses u).
+//
+// Note that e is either 0, 1 or -1
+// if e = 0    u = 0  and v = 0
+// if e = 1    u = w
+// if e = -1   u = -w - A = w * non_square * r^2
+//
+// Let r1 = non_square * r^2
+// Let r2 = 1 + r1
+// Note that r2 cannot be zero, -1/non_square is not a square.
+// We can (tediously) verify that:
+//   w^3 + A*w^2 + w = (A^2*r1 - r2^2) * A / r2^3
+// Therefore:
+//   chi(w^3 + A*w^2 + w) = chi((A^2*r1 - r2^2) * (A / r2^3))
+//   chi(w^3 + A*w^2 + w) = chi((A^2*r1 - r2^2) * (A / r2^3)) * 1
+//   chi(w^3 + A*w^2 + w) = chi((A^2*r1 - r2^2) * (A / r2^3)) * chi(r2^6)
+//   chi(w^3 + A*w^2 + w) = chi((A^2*r1 - r2^2) * (A / r2^3)  *     r2^6)
+//   chi(w^3 + A*w^2 + w) = chi((A^2*r1 - r2^2) *  A * r2^3)
+// Corollary:
+//   e =  1 if (A^2*r1 - r2^2) *  A * r2^3) is a non-zero square
+//   e = -1 if (A^2*r1 - r2^2) *  A * r2^3) is not a square
+//   Note that w^3 + A*w^2 + w (and therefore e) can never be zero:
+//     w^3 + A*w^2 + w = w * (w^2 + A*w + 1)
+//     w^3 + A*w^2 + w = w * (w^2 + A*w + A^2/4 - A^2/4 + 1)
+//     w^3 + A*w^2 + w = w * (w + A/2)^2        - A^2/4 + 1)
+//     which is zero only if:
+//       w = 0                   (impossible)
+//       (w + A/2)^2 = A^2/4 - 1 (impossible, because A^2/4-1 is not a square)
+//
+// Let isr   = invsqrt((A^2*r1 - r2^2) *  A * r2^3)
+//     isr   = sqrt(1        / ((A^2*r1 - r2^2) *  A * r2^3)) if e =  1
+//     isr   = sqrt(sqrt(-1) / ((A^2*r1 - r2^2) *  A * r2^3)) if e = -1
+//
+// if e = 1
+//   let u1 = -A * (A^2*r1 - r2^2) * A * r2^2 * isr^2
+//       u1 = w
+//       u1 = u
+//
+// if e = -1
+//   let ufactor = -non_square * sqrt(-1) * r^2
+//   let vfactor = sqrt(ufactor)
+//   let u2 = -A * (A^2*r1 - r2^2) * A * r2^2 * isr^2 * ufactor
+//       u2 = w * -1 * -non_square * r^2
+//       u2 = w * non_square * r^2
+//       u2 = u
+void crypto_elligator_map(u8 curve[32], const u8 hidden[32])
+{
+	fe r, u, t1, t2, t3;
+	fe_frombytes_mask(r, hidden, 2); // r is encoded in 254 bits.
+	fe_sq(r, r);
+	fe_add(t1, r, r);
+	fe_add(u, t1, fe_one);
+	fe_sq (t2, u);
+	fe_mul(t3, A2, t1);
+	fe_sub(t3, t3, t2);
+	fe_mul(t3, t3, A);
+	fe_mul(t1, t2, u);
+	fe_mul(t1, t3, t1);
+	int is_square = invsqrt(t1, t1);
+	fe_mul(u, r, ufactor);
+	fe_ccopy(u, fe_one, is_square);
+	fe_sq (t1, t1);
+	fe_mul(u, u, A);
+	fe_mul(u, u, t3);
+	fe_mul(u, u, t2);
+	fe_mul(u, u, t1);
+	fe_neg(u, u);
+	fe_tobytes(curve, u);
+
+	WIPE_BUFFER(t1);  WIPE_BUFFER(r);
+	WIPE_BUFFER(t2);  WIPE_BUFFER(u);
+	WIPE_BUFFER(t3);
+}
+
+// Elligator inverse map
+//
+// Computes the representative of a point, if possible.  If not, it does
+// nothing and returns -1.  Note that the success of the operation
+// depends only on the point (more precisely its u coordinate).  The
+// tweak parameter is used only upon success
+//
+// The tweak should be a random byte.  Beyond that, its contents are an
+// implementation detail. Currently, the tweak comprises:
+// - Bit  1  : sign of the v coordinate (0 if positive, 1 if negative)
+// - Bit  2-5: not used
+// - Bits 6-7: random padding
+//
+// From the paper:
+// Let sq = -non_square * u * (u+A)
+// if sq is not a square, or u = -A, there is no mapping
+// Assuming there is a mapping:
+//    if v is positive: r = sqrt(-u     / (non_square * (u+A)))
+//    if v is negative: r = sqrt(-(u+A) / (non_square * u    ))
+//
+// We compute isr = invsqrt(-non_square * u * (u+A))
+// if it wasn't a square, abort.
+// else, isr = sqrt(-1 / (non_square * u * (u+A))
+//
+// If v is positive, we return isr * u:
+//   isr * u = sqrt(-1 / (non_square * u * (u+A)) * u
+//   isr * u = sqrt(-u / (non_square * (u+A))
+//
+// If v is negative, we return isr * (u+A):
+//   isr * (u+A) = sqrt(-1     / (non_square * u * (u+A)) * (u+A)
+//   isr * (u+A) = sqrt(-(u+A) / (non_square * u)
+int crypto_elligator_rev(u8 hidden[32], const u8 public_key[32], u8 tweak)
+{
+	fe t1, t2, t3;
+	fe_frombytes(t1, public_key);    // t1 = u
+
+	fe_add(t2, t1, A);               // t2 = u + A
+	fe_mul(t3, t1, t2);
+	fe_mul_small(t3, t3, -2);
+	int is_square = invsqrt(t3, t3); // t3 = sqrt(-1 / non_square * u * (u+A))
+	if (is_square) {
+		// The only variable time bit.  This ultimately reveals how many
+		// tries it took us to find a representable key.
+		// This does not affect security as long as we try keys at random.
+
+		fe_ccopy    (t1, t2, tweak & 1); // multiply by u if v is positive,
+		fe_mul      (t3, t1, t3);        // multiply by u+A otherwise
+		fe_mul_small(t1, t3, 2);
+		fe_neg      (t2, t3);
+		fe_ccopy    (t3, t2, fe_isodd(t1));
+		fe_tobytes(hidden, t3);
+
+		// Pad with two random bits
+		hidden[31] |= tweak & 0xc0;
+	}
+
+	WIPE_BUFFER(t1);
+	WIPE_BUFFER(t2);
+	WIPE_BUFFER(t3);
+	return is_square - 1;
+}
+
+void crypto_elligator_key_pair(u8 hidden[32], u8 secret_key[32], u8 seed[32])
+{
+	u8 pk [32]; // public key
+	u8 buf[64]; // seed + representative
+	COPY(buf + 32, seed, 32);
+	do {
+		crypto_chacha20_djb(buf, 0, 64, buf+32, zero, 0);
+		crypto_x25519_dirty_fast(pk, buf); // or the "small" version
+	} while(crypto_elligator_rev(buf+32, pk, buf[32]));
+	// Note that the return value of crypto_elligator_rev() is
+	// independent from its tweak parameter.
+	// Therefore, buf[32] is not actually reused.  Either we loop one
+	// more time and buf[32] is used for the new seed, or we succeeded,
+	// and buf[32] becomes the tweak parameter.
+
+	crypto_wipe(seed, 32);
+	COPY(hidden    , buf + 32, 32);
+	COPY(secret_key, buf     , 32);
+	WIPE_BUFFER(buf);
+	WIPE_BUFFER(pk);
+}
+
+///////////////////////
+/// Scalar division ///
+///////////////////////
+
+// Montgomery reduction.
+// Divides x by (2^256), and reduces the result modulo L
+//
+// Precondition:
+//   x < L * 2^256
+// Constants:
+//   r = 2^256                 (makes division by r trivial)
+//   k = (r * (1/r) - 1) // L  (1/r is computed modulo L   )
+// Algorithm:
+//   s = (x * k) % r
+//   t = x + s*L      (t is always a multiple of r)
+//   u = (t/r) % L    (u is always below 2*L, conditional subtraction is enough)
+static void redc(u32 u[8], u32 x[16])
+{
+	static const u32 k[8] = {
+		0x12547e1b, 0xd2b51da3, 0xfdba84ff, 0xb1a206f2,
+		0xffa36bea, 0x14e75438, 0x6fe91836, 0x9db6c6f2,
+	};
+
+	// s = x * k (modulo 2^256)
+	// This is cheaper than the full multiplication.
+	u32 s[8] = {0};
+	FOR (i, 0, 8) {
+		u64 carry = 0;
+		FOR (j, 0, 8-i) {
+			carry  += s[i+j] + (u64)x[i] * k[j];
+			s[i+j]  = (u32)carry;
+			carry >>= 32;
+		}
+	}
+	u32 t[16] = {0};
+	multiply(t, s, L);
+
+	// t = t + x
+	u64 carry = 0;
+	FOR (i, 0, 16) {
+		carry  += (u64)t[i] + x[i];
+		t[i]    = (u32)carry;
+		carry >>= 32;
+	}
+
+	// u = (t / 2^256) % L
+	// Note that t / 2^256 is always below 2*L,
+	// So a constant time conditional subtraction is enough
+	remove_l(u, t+8);
+
+	WIPE_BUFFER(s);
+	WIPE_BUFFER(t);
+}
+
+void crypto_x25519_inverse(u8 blind_salt [32], const u8 private_key[32],
+                           const u8 curve_point[32])
+{
+	static const  u8 Lm2[32] = { // L - 2
+		0xeb, 0xd3, 0xf5, 0x5c, 0x1a, 0x63, 0x12, 0x58,
+		0xd6, 0x9c, 0xf7, 0xa2, 0xde, 0xf9, 0xde, 0x14,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10,
+	};
+	// 1 in Montgomery form
+	u32 m_inv [8] = {
+		0x8d98951d, 0xd6ec3174, 0x737dcf70, 0xc6ef5bf4,
+		0xfffffffe, 0xffffffff, 0xffffffff, 0x0fffffff,
+	};
+
+	u8 scalar[32];
+	crypto_eddsa_trim_scalar(scalar, private_key);
+
+	// Convert the scalar in Montgomery form
+	// m_scl = scalar * 2^256 (modulo L)
+	u32 m_scl[8];
+	{
+		u32 tmp[16];
+		ZERO(tmp, 8);
+		load32_le_buf(tmp+8, scalar, 8);
+		mod_l(scalar, tmp);
+		load32_le_buf(m_scl, scalar, 8);
+		WIPE_BUFFER(tmp); // Wipe ASAP to save stack space
+	}
+
+	// Compute the inverse
+	u32 product[16];
+	for (int i = 252; i >= 0; i--) {
+		ZERO(product, 16);
+		multiply(product, m_inv, m_inv);
+		redc(m_inv, product);
+		if (scalar_bit(Lm2, i)) {
+			ZERO(product, 16);
+			multiply(product, m_inv, m_scl);
+			redc(m_inv, product);
+		}
+	}
+	// Convert the inverse *out* of Montgomery form
+	// scalar = m_inv / 2^256 (modulo L)
+	COPY(product, m_inv, 8);
+	ZERO(product + 8, 8);
+	redc(m_inv, product);
+	store32_le_buf(scalar, m_inv, 8); // the *inverse* of the scalar
+
+	// Clear the cofactor of scalar:
+	//   cleared = scalar * (3*L + 1)      (modulo 8*L)
+	//   cleared = scalar + scalar * 3 * L (modulo 8*L)
+	// Note that (scalar * 3) is reduced modulo 8, so we only need the
+	// first byte.
+	add_xl(scalar, scalar[0] * 3);
+
+	// Recall that 8*L < 2^256. However it is also very close to
+	// 2^255. If we spanned the ladder over 255 bits, random tests
+	// wouldn't catch the off-by-one error.
+	scalarmult(blind_salt, scalar, curve_point, 256);
+
+	WIPE_BUFFER(scalar);   WIPE_BUFFER(m_scl);
+	WIPE_BUFFER(product);  WIPE_BUFFER(m_inv);
+}
+
+////////////////////////////////
+/// Authenticated encryption ///
+////////////////////////////////
+static void lock_auth(u8 mac[16], const u8  auth_key[32],
+                      const u8 *ad         , size_t ad_size,
+                      const u8 *cipher_text, size_t text_size)
+{
+	u8 sizes[16]; // Not secret, not wiped
+	store64_le(sizes + 0, ad_size);
+	store64_le(sizes + 8, text_size);
+	crypto_poly1305_ctx poly_ctx;           // auto wiped...
+	crypto_poly1305_init  (&poly_ctx, auth_key);
+	crypto_poly1305_update(&poly_ctx, ad         , ad_size);
+	crypto_poly1305_update(&poly_ctx, zero       , align(ad_size, 16));
+	crypto_poly1305_update(&poly_ctx, cipher_text, text_size);
+	crypto_poly1305_update(&poly_ctx, zero       , align(text_size, 16));
+	crypto_poly1305_update(&poly_ctx, sizes      , 16);
+	crypto_poly1305_final (&poly_ctx, mac); // ...here
+}
+
+void crypto_aead_init_x(crypto_aead_ctx *ctx,
+                        u8 const key[32], const u8 nonce[24])
+{
+	crypto_chacha20_h(ctx->key, key, nonce);
+	COPY(ctx->nonce, nonce + 16, 8);
+	ctx->counter = 0;
+}
+
+void crypto_aead_init_djb(crypto_aead_ctx *ctx,
+                          const u8 key[32], const u8 nonce[8])
+{
+	COPY(ctx->key  , key  , 32);
+	COPY(ctx->nonce, nonce,  8);
+	ctx->counter = 0;
+}
+
+void crypto_aead_init_ietf(crypto_aead_ctx *ctx,
+                           const u8 key[32], const u8 nonce[12])
+{
+	COPY(ctx->key  , key      , 32);
+	COPY(ctx->nonce, nonce + 4,  8);
+	ctx->counter = (u64)load32_le(nonce) << 32;
+}
+
+void crypto_aead_write(crypto_aead_ctx *ctx, u8 *cipher_text, u8 mac[16],
+                       const u8 *ad,         size_t ad_size,
+                       const u8 *plain_text, size_t text_size)
+{
+	u8 auth_key[64]; // the last 32 bytes are used for rekeying.
+	crypto_chacha20_djb(auth_key, 0, 64, ctx->key, ctx->nonce, ctx->counter);
+	crypto_chacha20_djb(cipher_text, plain_text, text_size,
+	                    ctx->key, ctx->nonce, ctx->counter + 1);
+	lock_auth(mac, auth_key, ad, ad_size, cipher_text, text_size);
+	COPY(ctx->key, auth_key + 32, 32);
+	WIPE_BUFFER(auth_key);
+}
+
+int crypto_aead_read(crypto_aead_ctx *ctx, u8 *plain_text, const u8 mac[16],
+                     const u8 *ad,          size_t ad_size,
+                     const u8 *cipher_text, size_t text_size)
+{
+	u8 auth_key[64]; // the last 32 bytes are used for rekeying.
+	u8 real_mac[16];
+	crypto_chacha20_djb(auth_key, 0, 64, ctx->key, ctx->nonce, ctx->counter);
+	lock_auth(real_mac, auth_key, ad, ad_size, cipher_text, text_size);
+	int mismatch = crypto_verify16(mac, real_mac);
+	if (!mismatch) {
+		crypto_chacha20_djb(plain_text, cipher_text, text_size,
+		                    ctx->key, ctx->nonce, ctx->counter + 1);
+		COPY(ctx->key, auth_key + 32, 32);
+	}
+	WIPE_BUFFER(auth_key);
+	WIPE_BUFFER(real_mac);
+	return mismatch;
+}
+
+void crypto_aead_lock(u8 *cipher_text, u8 mac[16], const u8 key[32],
+                      const u8  nonce[24], const u8 *ad, size_t ad_size,
+                      const u8 *plain_text, size_t text_size)
+{
+	crypto_aead_ctx ctx;
+	crypto_aead_init_x(&ctx, key, nonce);
+	crypto_aead_write(&ctx, cipher_text, mac, ad, ad_size,
+	                  plain_text, text_size);
+	crypto_wipe(&ctx, sizeof(ctx));
+}
+
+int crypto_aead_unlock(u8 *plain_text, const u8  mac[16], const u8 key[32],
+                       const u8 nonce[24], const u8 *ad, size_t ad_size,
+                       const u8 *cipher_text, size_t text_size)
+{
+	crypto_aead_ctx ctx;
+	crypto_aead_init_x(&ctx, key, nonce);
+	int mismatch = crypto_aead_read(&ctx, plain_text, mac, ad, ad_size,
+	                                cipher_text, text_size);
+	crypto_wipe(&ctx, sizeof(ctx));
+	return mismatch;
+}
+
+#ifdef MONOCYPHER_CPP_NAMESPACE
+}
+#endif
diff --git a/include/extern/external/monocypher.h b/include/extern/external/monocypher.h
new file mode 100644
index 0000000..8f466e3
--- /dev/null
+++ b/include/extern/external/monocypher.h
@@ -0,0 +1,321 @@
+// Monocypher version 4.0.1
+//
+// This file is dual-licensed.  Choose whichever licence you want from
+// the two licences listed below.
+//
+// The first licence is a regular 2-clause BSD licence.  The second licence
+// is the CC-0 from Creative Commons. It is intended to release Monocypher
+// to the public domain.  The BSD licence serves as a fallback option.
+//
+// SPDX-License-Identifier: BSD-2-Clause OR CC0-1.0
+//
+// ------------------------------------------------------------------------
+//
+// Copyright (c) 2017-2019, Loup Vaillant
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the
+//    distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// ------------------------------------------------------------------------
+//
+// Written in 2017-2019 by Loup Vaillant
+//
+// To the extent possible under law, the author(s) have dedicated all copyright
+// and related neighboring rights to this software to the public domain
+// worldwide.  This software is distributed without any warranty.
+//
+// You should have received a copy of the CC0 Public Domain Dedication along
+// with this software.  If not, see
+// <https://creativecommons.org/publicdomain/zero/1.0/>
+
+#ifndef MONOCYPHER_H
+#define MONOCYPHER_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef MONOCYPHER_CPP_NAMESPACE
+namespace MONOCYPHER_CPP_NAMESPACE {
+#elif defined(__cplusplus)
+extern "C" {
+#endif
+
+// Constant time comparisons
+// -------------------------
+
+// Return 0 if a and b are equal, -1 otherwise
+int crypto_verify16(const uint8_t a[16], const uint8_t b[16]);
+int crypto_verify32(const uint8_t a[32], const uint8_t b[32]);
+int crypto_verify64(const uint8_t a[64], const uint8_t b[64]);
+
+
+// Erase sensitive data
+// --------------------
+void crypto_wipe(void *secret, size_t size);
+
+
+// Authenticated encryption
+// ------------------------
+void crypto_aead_lock(uint8_t       *cipher_text,
+                      uint8_t        mac  [16],
+                      const uint8_t  key  [32],
+                      const uint8_t  nonce[24],
+                      const uint8_t *ad,         size_t ad_size,
+                      const uint8_t *plain_text, size_t text_size);
+int crypto_aead_unlock(uint8_t       *plain_text,
+                       const uint8_t  mac  [16],
+                       const uint8_t  key  [32],
+                       const uint8_t  nonce[24],
+                       const uint8_t *ad,          size_t ad_size,
+                       const uint8_t *cipher_text, size_t text_size);
+
+// Authenticated stream
+// --------------------
+typedef struct {
+	uint64_t counter;
+	uint8_t  key[32];
+	uint8_t  nonce[8];
+} crypto_aead_ctx;
+
+void crypto_aead_init_x(crypto_aead_ctx *ctx,
+                        const uint8_t key[32], const uint8_t nonce[24]);
+void crypto_aead_init_djb(crypto_aead_ctx *ctx,
+                          const uint8_t key[32], const uint8_t nonce[8]);
+void crypto_aead_init_ietf(crypto_aead_ctx *ctx,
+                           const uint8_t key[32], const uint8_t nonce[12]);
+
+void crypto_aead_write(crypto_aead_ctx *ctx,
+                       uint8_t         *cipher_text,
+                       uint8_t          mac[16],
+                       const uint8_t   *ad        , size_t ad_size,
+                       const uint8_t   *plain_text, size_t text_size);
+int crypto_aead_read(crypto_aead_ctx *ctx,
+                     uint8_t         *plain_text,
+                     const uint8_t    mac[16],
+                     const uint8_t   *ad        , size_t ad_size,
+                     const uint8_t   *cipher_text, size_t text_size);
+
+
+// General purpose hash (BLAKE2b)
+// ------------------------------
+
+// Direct interface
+void crypto_blake2b(uint8_t *hash,          size_t hash_size,
+                    const uint8_t *message, size_t message_size);
+
+void crypto_blake2b_keyed(uint8_t *hash,          size_t hash_size,
+                          const uint8_t *key,     size_t key_size,
+                          const uint8_t *message, size_t message_size);
+
+// Incremental interface
+typedef struct {
+	// Do not rely on the size or contents of this type,
+	// for they may change without notice.
+	uint64_t hash[8];
+	uint64_t input_offset[2];
+	uint64_t input[16];
+	size_t   input_idx;
+	size_t   hash_size;
+} crypto_blake2b_ctx;
+
+void crypto_blake2b_init(crypto_blake2b_ctx *ctx, size_t hash_size);
+void crypto_blake2b_keyed_init(crypto_blake2b_ctx *ctx, size_t hash_size,
+                               const uint8_t *key, size_t key_size);
+void crypto_blake2b_update(crypto_blake2b_ctx *ctx,
+                           const uint8_t *message, size_t message_size);
+void crypto_blake2b_final(crypto_blake2b_ctx *ctx, uint8_t *hash);
+
+
+// Password key derivation (Argon2)
+// --------------------------------
+#define CRYPTO_ARGON2_D  0
+#define CRYPTO_ARGON2_I  1
+#define CRYPTO_ARGON2_ID 2
+
+typedef struct {
+	uint32_t algorithm;  // Argon2d, Argon2i, Argon2id
+	uint32_t nb_blocks;  // memory hardness, >= 8 * nb_lanes
+	uint32_t nb_passes;  // CPU hardness, >= 1 (>= 3 recommended for Argon2i)
+	uint32_t nb_lanes;   // parallelism level (single threaded anyway)
+} crypto_argon2_config;
+
+typedef struct {
+	const uint8_t *pass;
+	const uint8_t *salt;
+	uint32_t pass_size;
+	uint32_t salt_size;  // 16 bytes recommended
+} crypto_argon2_inputs;
+
+typedef struct {
+	const uint8_t *key; // may be NULL if no key
+	const uint8_t *ad;  // may be NULL if no additional data
+	uint32_t key_size;  // 0 if no key (32 bytes recommended otherwise)
+	uint32_t ad_size;   // 0 if no additional data
+} crypto_argon2_extras;
+
+extern const crypto_argon2_extras crypto_argon2_no_extras;
+
+void crypto_argon2(uint8_t *hash, uint32_t hash_size, void *work_area,
+                   crypto_argon2_config config,
+                   crypto_argon2_inputs inputs,
+                   crypto_argon2_extras extras);
+
+
+// Key exchange (X-25519)
+// ----------------------
+
+// Shared secrets are not quite random.
+// Hash them to derive an actual shared key.
+void crypto_x25519_public_key(uint8_t       public_key[32],
+                              const uint8_t secret_key[32]);
+void crypto_x25519(uint8_t       raw_shared_secret[32],
+                   const uint8_t your_secret_key  [32],
+                   const uint8_t their_public_key [32]);
+
+// Conversion to EdDSA
+void crypto_x25519_to_eddsa(uint8_t eddsa[32], const uint8_t x25519[32]);
+
+// scalar "division"
+// Used for OPRF.  Be aware that exponential blinding is less secure
+// than Diffie-Hellman key exchange.
+void crypto_x25519_inverse(uint8_t       blind_salt [32],
+                           const uint8_t private_key[32],
+                           const uint8_t curve_point[32]);
+
+// "Dirty" versions of x25519_public_key().
+// Use with crypto_elligator_rev().
+// Leaks 3 bits of the private key.
+void crypto_x25519_dirty_small(uint8_t pk[32], const uint8_t sk[32]);
+void crypto_x25519_dirty_fast (uint8_t pk[32], const uint8_t sk[32]);
+
+
+// Signatures
+// ----------
+
+// EdDSA with curve25519 + BLAKE2b
+void crypto_eddsa_key_pair(uint8_t secret_key[64],
+                           uint8_t public_key[32],
+                           uint8_t seed[32]);
+void crypto_eddsa_sign(uint8_t        signature [64],
+                       const uint8_t  secret_key[64],
+                       const uint8_t *message, size_t message_size);
+int crypto_eddsa_check(const uint8_t  signature [64],
+                       const uint8_t  public_key[32],
+                       const uint8_t *message, size_t message_size);
+
+// Conversion to X25519
+void crypto_eddsa_to_x25519(uint8_t x25519[32], const uint8_t eddsa[32]);
+
+// EdDSA building blocks
+void crypto_eddsa_trim_scalar(uint8_t out[32], const uint8_t in[32]);
+void crypto_eddsa_reduce(uint8_t reduced[32], const uint8_t expanded[64]);
+void crypto_eddsa_mul_add(uint8_t r[32],
+                          const uint8_t a[32],
+                          const uint8_t b[32],
+                          const uint8_t c[32]);
+void crypto_eddsa_scalarbase(uint8_t point[32], const uint8_t scalar[32]);
+int crypto_eddsa_check_equation(const uint8_t signature[64],
+                                const uint8_t public_key[32],
+                                const uint8_t h_ram[32]);
+
+
+// Chacha20
+// --------
+
+// Specialised hash.
+// Used to hash X25519 shared secrets.
+void crypto_chacha20_h(uint8_t       out[32],
+                       const uint8_t key[32],
+                       const uint8_t in [16]);
+
+// Unauthenticated stream cipher.
+// Don't forget to add authentication.
+uint64_t crypto_chacha20_djb(uint8_t       *cipher_text,
+                             const uint8_t *plain_text,
+                             size_t         text_size,
+                             const uint8_t  key[32],
+                             const uint8_t  nonce[8],
+                             uint64_t       ctr);
+uint32_t crypto_chacha20_ietf(uint8_t       *cipher_text,
+                              const uint8_t *plain_text,
+                              size_t         text_size,
+                              const uint8_t  key[32],
+                              const uint8_t  nonce[12],
+                              uint32_t       ctr);
+uint64_t crypto_chacha20_x(uint8_t       *cipher_text,
+                           const uint8_t *plain_text,
+                           size_t         text_size,
+                           const uint8_t  key[32],
+                           const uint8_t  nonce[24],
+                           uint64_t       ctr);
+
+
+// Poly 1305
+// ---------
+
+// This is a *one time* authenticator.
+// Disclosing the mac reveals the key.
+// See crypto_lock() on how to use it properly.
+
+// Direct interface
+void crypto_poly1305(uint8_t        mac[16],
+                     const uint8_t *message, size_t message_size,
+                     const uint8_t  key[32]);
+
+// Incremental interface
+typedef struct {
+	// Do not rely on the size or contents of this type,
+	// for they may change without notice.
+	uint8_t  c[16];  // chunk of the message
+	size_t   c_idx;  // How many bytes are there in the chunk.
+	uint32_t r  [4]; // constant multiplier (from the secret key)
+	uint32_t pad[4]; // random number added at the end (from the secret key)
+	uint32_t h  [5]; // accumulated hash
+} crypto_poly1305_ctx;
+
+void crypto_poly1305_init  (crypto_poly1305_ctx *ctx, const uint8_t key[32]);
+void crypto_poly1305_update(crypto_poly1305_ctx *ctx,
+                            const uint8_t *message, size_t message_size);
+void crypto_poly1305_final (crypto_poly1305_ctx *ctx, uint8_t mac[16]);
+
+
+// Elligator 2
+// -----------
+
+// Elligator mappings proper
+void crypto_elligator_map(uint8_t curve [32], const uint8_t hidden[32]);
+int  crypto_elligator_rev(uint8_t hidden[32], const uint8_t curve [32],
+                          uint8_t tweak);
+
+// Easy to use key pair generation
+void crypto_elligator_key_pair(uint8_t hidden[32], uint8_t secret_key[32],
+                               uint8_t seed[32]);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // MONOCYPHER_H
diff --git a/include/extern/external/qoi.h b/include/extern/external/qoi.h
new file mode 100644
index 0000000..988f9ed
--- /dev/null
+++ b/include/extern/external/qoi.h
@@ -0,0 +1,671 @@
+/*
+
+QOI - The "Quite OK Image" format for fast, lossless image compression
+
+Dominic Szablewski - https://phoboslab.org
+
+
+-- LICENSE: The MIT License(MIT)
+
+Copyright(c) 2021 Dominic Szablewski
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files(the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and / or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions :
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+
+-- About
+
+QOI encodes and decodes images in a lossless format. Compared to stb_image and
+stb_image_write QOI offers 20x-50x faster encoding, 3x-4x faster decoding and
+20% better compression.
+
+
+-- Synopsis
+
+// Define `QOI_IMPLEMENTATION` in *one* C/C++ file before including this
+// library to create the implementation.
+
+#define QOI_IMPLEMENTATION
+#include "qoi.h"
+
+// Encode and store an RGBA buffer to the file system. The qoi_desc describes
+// the input pixel data.
+qoi_write("image_new.qoi", rgba_pixels, &(qoi_desc){
+	.width = 1920,
+	.height = 1080,
+	.channels = 4,
+	.colorspace = QOI_SRGB
+});
+
+// Load and decode a QOI image from the file system into a 32bbp RGBA buffer.
+// The qoi_desc struct will be filled with the width, height, number of channels
+// and colorspace read from the file header.
+qoi_desc desc;
+void *rgba_pixels = qoi_read("image.qoi", &desc, 4);
+
+
+
+-- Documentation
+
+This library provides the following functions;
+- qoi_read    -- read and decode a QOI file
+- qoi_decode  -- decode the raw bytes of a QOI image from memory
+- qoi_write   -- encode and write a QOI file
+- qoi_encode  -- encode an rgba buffer into a QOI image in memory
+
+See the function declaration below for the signature and more information.
+
+If you don't want/need the qoi_read and qoi_write functions, you can define
+QOI_NO_STDIO before including this library.
+
+This library uses malloc() and free(). To supply your own malloc implementation
+you can define QOI_MALLOC and QOI_FREE before including this library.
+
+This library uses memset() to zero-initialize the index. To supply your own
+implementation you can define QOI_ZEROARR before including this library.
+
+
+-- Data Format
+
+A QOI file has a 14 byte header, followed by any number of data "chunks" and an
+8-byte end marker.
+
+struct qoi_header_t {
+	char     magic[4];   // magic bytes "qoif"
+	uint32_t width;      // image width in pixels (BE)
+	uint32_t height;     // image height in pixels (BE)
+	uint8_t  channels;   // 3 = RGB, 4 = RGBA
+	uint8_t  colorspace; // 0 = sRGB with linear alpha, 1 = all channels linear
+};
+
+Images are encoded row by row, left to right, top to bottom. The decoder and
+encoder start with {r: 0, g: 0, b: 0, a: 255} as the previous pixel value. An
+image is complete when all pixels specified by width * height have been covered.
+
+Pixels are encoded as
+ - a run of the previous pixel
+ - an index into an array of previously seen pixels
+ - a difference to the previous pixel value in r,g,b
+ - full r,g,b or r,g,b,a values
+
+The color channels are assumed to not be premultiplied with the alpha channel
+("un-premultiplied alpha").
+
+A running array[64] (zero-initialized) of previously seen pixel values is
+maintained by the encoder and decoder. Each pixel that is seen by the encoder
+and decoder is put into this array at the position formed by a hash function of
+the color value. In the encoder, if the pixel value at the index matches the
+current pixel, this index position is written to the stream as QOI_OP_INDEX.
+The hash function for the index is:
+
+	index_position = (r * 3 + g * 5 + b * 7 + a * 11) % 64
+
+Each chunk starts with a 2- or 8-bit tag, followed by a number of data bits. The
+bit length of chunks is divisible by 8 - i.e. all chunks are byte aligned. All
+values encoded in these data bits have the most significant bit on the left.
+
+The 8-bit tags have precedence over the 2-bit tags. A decoder must check for the
+presence of an 8-bit tag first.
+
+The byte stream's end is marked with 7 0x00 bytes followed a single 0x01 byte.
+
+
+The possible chunks are:
+
+
+.- QOI_OP_INDEX ----------.
+|         Byte[0]         |
+|  7  6  5  4  3  2  1  0 |
+|-------+-----------------|
+|  0  0 |     index       |
+`-------------------------`
+2-bit tag b00
+6-bit index into the color index array: 0..63
+
+A valid encoder must not issue 2 or more consecutive QOI_OP_INDEX chunks to the
+same index. QOI_OP_RUN should be used instead.
+
+
+.- QOI_OP_DIFF -----------.
+|         Byte[0]         |
+|  7  6  5  4  3  2  1  0 |
+|-------+-----+-----+-----|
+|  0  1 |  dr |  dg |  db |
+`-------------------------`
+2-bit tag b01
+2-bit   red channel difference from the previous pixel between -2..1
+2-bit green channel difference from the previous pixel between -2..1
+2-bit  blue channel difference from the previous pixel between -2..1
+
+The difference to the current channel values are using a wraparound operation,
+so "1 - 2" will result in 255, while "255 + 1" will result in 0.
+
+Values are stored as unsigned integers with a bias of 2. E.g. -2 is stored as
+0 (b00). 1 is stored as 3 (b11).
+
+The alpha value remains unchanged from the previous pixel.
+
+
+.- QOI_OP_LUMA -------------------------------------.
+|         Byte[0]         |         Byte[1]         |
+|  7  6  5  4  3  2  1  0 |  7  6  5  4  3  2  1  0 |
+|-------+-----------------+-------------+-----------|
+|  1  0 |  green diff     |   dr - dg   |  db - dg  |
+`---------------------------------------------------`
+2-bit tag b10
+6-bit green channel difference from the previous pixel -32..31
+4-bit   red channel difference minus green channel difference -8..7
+4-bit  blue channel difference minus green channel difference -8..7
+
+The green channel is used to indicate the general direction of change and is
+encoded in 6 bits. The red and blue channels (dr and db) base their diffs off
+of the green channel difference and are encoded in 4 bits. I.e.:
+	dr_dg = (cur_px.r - prev_px.r) - (cur_px.g - prev_px.g)
+	db_dg = (cur_px.b - prev_px.b) - (cur_px.g - prev_px.g)
+
+The difference to the current channel values are using a wraparound operation,
+so "10 - 13" will result in 253, while "250 + 7" will result in 1.
+
+Values are stored as unsigned integers with a bias of 32 for the green channel
+and a bias of 8 for the red and blue channel.
+
+The alpha value remains unchanged from the previous pixel.
+
+
+.- QOI_OP_RUN ------------.
+|         Byte[0]         |
+|  7  6  5  4  3  2  1  0 |
+|-------+-----------------|
+|  1  1 |       run       |
+`-------------------------`
+2-bit tag b11
+6-bit run-length repeating the previous pixel: 1..62
+
+The run-length is stored with a bias of -1. Note that the run-lengths 63 and 64
+(b111110 and b111111) are illegal as they are occupied by the QOI_OP_RGB and
+QOI_OP_RGBA tags.
+
+
+.- QOI_OP_RGB ------------------------------------------.
+|         Byte[0]         | Byte[1] | Byte[2] | Byte[3] |
+|  7  6  5  4  3  2  1  0 | 7 .. 0  | 7 .. 0  | 7 .. 0  |
+|-------------------------+---------+---------+---------|
+|  1  1  1  1  1  1  1  0 |   red   |  green  |  blue   |
+`-------------------------------------------------------`
+8-bit tag b11111110
+8-bit   red channel value
+8-bit green channel value
+8-bit  blue channel value
+
+The alpha value remains unchanged from the previous pixel.
+
+
+.- QOI_OP_RGBA ---------------------------------------------------.
+|         Byte[0]         | Byte[1] | Byte[2] | Byte[3] | Byte[4] |
+|  7  6  5  4  3  2  1  0 | 7 .. 0  | 7 .. 0  | 7 .. 0  | 7 .. 0  |
+|-------------------------+---------+---------+---------+---------|
+|  1  1  1  1  1  1  1  1 |   red   |  green  |  blue   |  alpha  |
+`-----------------------------------------------------------------`
+8-bit tag b11111111
+8-bit   red channel value
+8-bit green channel value
+8-bit  blue channel value
+8-bit alpha channel value
+
+*/
+
+
+/* -----------------------------------------------------------------------------
+Header - Public functions */
+
+#ifndef QOI_H
+#define QOI_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* A pointer to a qoi_desc struct has to be supplied to all of qoi's functions.
+It describes either the input format (for qoi_write and qoi_encode), or is
+filled with the description read from the file header (for qoi_read and
+qoi_decode).
+
+The colorspace in this qoi_desc is an enum where
+	0 = sRGB, i.e. gamma scaled RGB channels and a linear alpha channel
+	1 = all channels are linear
+You may use the constants QOI_SRGB or QOI_LINEAR. The colorspace is purely
+informative. It will be saved to the file header, but does not affect
+how chunks are en-/decoded. */
+
+#define QOI_SRGB   0
+#define QOI_LINEAR 1
+
+typedef struct {
+	unsigned int width;
+	unsigned int height;
+	unsigned char channels;
+	unsigned char colorspace;
+} qoi_desc;
+
+#ifndef QOI_NO_STDIO
+
+/* Encode raw RGB or RGBA pixels into a QOI image and write it to the file
+system. The qoi_desc struct must be filled with the image width, height,
+number of channels (3 = RGB, 4 = RGBA) and the colorspace.
+
+The function returns 0 on failure (invalid parameters, or fopen or malloc
+failed) or the number of bytes written on success. */
+
+int qoi_write(const char *filename, const void *data, const qoi_desc *desc);
+
+
+/* Read and decode a QOI image from the file system. If channels is 0, the
+number of channels from the file header is used. If channels is 3 or 4 the
+output format will be forced into this number of channels.
+
+The function either returns NULL on failure (invalid data, or malloc or fopen
+failed) or a pointer to the decoded pixels. On success, the qoi_desc struct
+will be filled with the description from the file header.
+
+The returned pixel data should be free()d after use. */
+
+void *qoi_read(const char *filename, qoi_desc *desc, int channels);
+
+#endif /* QOI_NO_STDIO */
+
+
+/* Encode raw RGB or RGBA pixels into a QOI image in memory.
+
+The function either returns NULL on failure (invalid parameters or malloc
+failed) or a pointer to the encoded data on success. On success the out_len
+is set to the size in bytes of the encoded data.
+
+The returned qoi data should be free()d after use. */
+
+void *qoi_encode(const void *data, const qoi_desc *desc, int *out_len);
+
+
+/* Decode a QOI image from memory.
+
+The function either returns NULL on failure (invalid parameters or malloc
+failed) or a pointer to the decoded pixels. On success, the qoi_desc struct
+is filled with the description from the file header.
+
+The returned pixel data should be free()d after use. */
+
+void *qoi_decode(const void *data, int size, qoi_desc *desc, int channels);
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* QOI_H */
+
+
+/* -----------------------------------------------------------------------------
+Implementation */
+
+#ifdef QOI_IMPLEMENTATION
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef QOI_MALLOC
+	#define QOI_MALLOC(sz) malloc(sz)
+	#define QOI_FREE(p)    free(p)
+#endif
+#ifndef QOI_ZEROARR
+	#define QOI_ZEROARR(a) memset((a),0,sizeof(a))
+#endif
+
+#define QOI_OP_INDEX  0x00 /* 00xxxxxx */
+#define QOI_OP_DIFF   0x40 /* 01xxxxxx */
+#define QOI_OP_LUMA   0x80 /* 10xxxxxx */
+#define QOI_OP_RUN    0xc0 /* 11xxxxxx */
+#define QOI_OP_RGB    0xfe /* 11111110 */
+#define QOI_OP_RGBA   0xff /* 11111111 */
+
+#define QOI_MASK_2    0xc0 /* 11000000 */
+
+#define QOI_COLOR_HASH(C) (C.rgba.r*3 + C.rgba.g*5 + C.rgba.b*7 + C.rgba.a*11)
+#define QOI_MAGIC \
+	(((unsigned int)'q') << 24 | ((unsigned int)'o') << 16 | \
+	 ((unsigned int)'i') <<  8 | ((unsigned int)'f'))
+#define QOI_HEADER_SIZE 14
+
+/* 2GB is the max file size that this implementation can safely handle. We guard
+against anything larger than that, assuming the worst case with 5 bytes per
+pixel, rounded down to a nice clean value. 400 million pixels ought to be
+enough for anybody. */
+#define QOI_PIXELS_MAX ((unsigned int)400000000)
+
+typedef union {
+	struct { unsigned char r, g, b, a; } rgba;
+	unsigned int v;
+} qoi_rgba_t;
+
+static const unsigned char qoi_padding[8] = {0,0,0,0,0,0,0,1};
+
+static void qoi_write_32(unsigned char *bytes, int *p, unsigned int v) {
+	bytes[(*p)++] = (0xff000000 & v) >> 24;
+	bytes[(*p)++] = (0x00ff0000 & v) >> 16;
+	bytes[(*p)++] = (0x0000ff00 & v) >> 8;
+	bytes[(*p)++] = (0x000000ff & v);
+}
+
+static unsigned int qoi_read_32(const unsigned char *bytes, int *p) {
+	unsigned int a = bytes[(*p)++];
+	unsigned int b = bytes[(*p)++];
+	unsigned int c = bytes[(*p)++];
+	unsigned int d = bytes[(*p)++];
+	return a << 24 | b << 16 | c << 8 | d;
+}
+
+void *qoi_encode(const void *data, const qoi_desc *desc, int *out_len) {
+	int i, max_size, p, run;
+	int px_len, px_end, px_pos, channels;
+	unsigned char *bytes;
+	const unsigned char *pixels;
+	qoi_rgba_t index[64];
+	qoi_rgba_t px, px_prev;
+
+	if (
+		data == NULL || out_len == NULL || desc == NULL ||
+		desc->width == 0 || desc->height == 0 ||
+		desc->channels < 3 || desc->channels > 4 ||
+		desc->colorspace > 1 ||
+		desc->height >= QOI_PIXELS_MAX / desc->width
+	) {
+		return NULL;
+	}
+
+	max_size =
+		desc->width * desc->height * (desc->channels + 1) +
+		QOI_HEADER_SIZE + sizeof(qoi_padding);
+
+	p = 0;
+	bytes = (unsigned char *) QOI_MALLOC(max_size);
+	if (!bytes) {
+		return NULL;
+	}
+
+	qoi_write_32(bytes, &p, QOI_MAGIC);
+	qoi_write_32(bytes, &p, desc->width);
+	qoi_write_32(bytes, &p, desc->height);
+	bytes[p++] = desc->channels;
+	bytes[p++] = desc->colorspace;
+
+
+	pixels = (const unsigned char *)data;
+
+	QOI_ZEROARR(index);
+
+	run = 0;
+	px_prev.rgba.r = 0;
+	px_prev.rgba.g = 0;
+	px_prev.rgba.b = 0;
+	px_prev.rgba.a = 255;
+	px = px_prev;
+
+	px_len = desc->width * desc->height * desc->channels;
+	px_end = px_len - desc->channels;
+	channels = desc->channels;
+
+	for (px_pos = 0; px_pos < px_len; px_pos += channels) {
+		if (channels == 4) {
+			px = *(qoi_rgba_t *)(pixels + px_pos);
+		}
+		else {
+			px.rgba.r = pixels[px_pos + 0];
+			px.rgba.g = pixels[px_pos + 1];
+			px.rgba.b = pixels[px_pos + 2];
+		}
+
+		if (px.v == px_prev.v) {
+			run++;
+			if (run == 62 || px_pos == px_end) {
+				bytes[p++] = QOI_OP_RUN | (run - 1);
+				run = 0;
+			}
+		}
+		else {
+			int index_pos;
+
+			if (run > 0) {
+				bytes[p++] = QOI_OP_RUN | (run - 1);
+				run = 0;
+			}
+
+			index_pos = QOI_COLOR_HASH(px) % 64;
+
+			if (index[index_pos].v == px.v) {
+				bytes[p++] = QOI_OP_INDEX | index_pos;
+			}
+			else {
+				index[index_pos] = px;
+
+				if (px.rgba.a == px_prev.rgba.a) {
+					signed char vr = px.rgba.r - px_prev.rgba.r;
+					signed char vg = px.rgba.g - px_prev.rgba.g;
+					signed char vb = px.rgba.b - px_prev.rgba.b;
+
+					signed char vg_r = vr - vg;
+					signed char vg_b = vb - vg;
+
+					if (
+						vr > -3 && vr < 2 &&
+						vg > -3 && vg < 2 &&
+						vb > -3 && vb < 2
+					) {
+						bytes[p++] = QOI_OP_DIFF | (vr + 2) << 4 | (vg + 2) << 2 | (vb + 2);
+					}
+					else if (
+						vg_r >  -9 && vg_r <  8 &&
+						vg   > -33 && vg   < 32 &&
+						vg_b >  -9 && vg_b <  8
+					) {
+						bytes[p++] = QOI_OP_LUMA     | (vg   + 32);
+						bytes[p++] = (vg_r + 8) << 4 | (vg_b +  8);
+					}
+					else {
+						bytes[p++] = QOI_OP_RGB;
+						bytes[p++] = px.rgba.r;
+						bytes[p++] = px.rgba.g;
+						bytes[p++] = px.rgba.b;
+					}
+				}
+				else {
+					bytes[p++] = QOI_OP_RGBA;
+					bytes[p++] = px.rgba.r;
+					bytes[p++] = px.rgba.g;
+					bytes[p++] = px.rgba.b;
+					bytes[p++] = px.rgba.a;
+				}
+			}
+		}
+		px_prev = px;
+	}
+
+	for (i = 0; i < (int)sizeof(qoi_padding); i++) {
+		bytes[p++] = qoi_padding[i];
+	}
+
+	*out_len = p;
+	return bytes;
+}
+
+void *qoi_decode(const void *data, int size, qoi_desc *desc, int channels) {
+	const unsigned char *bytes;
+	unsigned int header_magic;
+	unsigned char *pixels;
+	qoi_rgba_t index[64];
+	qoi_rgba_t px;
+	int px_len, chunks_len, px_pos;
+	int p = 0, run = 0;
+
+	if (
+		data == NULL || desc == NULL ||
+		(channels != 0 && channels != 3 && channels != 4) ||
+		size < QOI_HEADER_SIZE + (int)sizeof(qoi_padding)
+	) {
+		return NULL;
+	}
+
+	bytes = (const unsigned char *)data;
+
+	header_magic = qoi_read_32(bytes, &p);
+	desc->width = qoi_read_32(bytes, &p);
+	desc->height = qoi_read_32(bytes, &p);
+	desc->channels = bytes[p++];
+	desc->colorspace = bytes[p++];
+
+	if (
+		desc->width == 0 || desc->height == 0 ||
+		desc->channels < 3 || desc->channels > 4 ||
+		desc->colorspace > 1 ||
+		header_magic != QOI_MAGIC ||
+		desc->height >= QOI_PIXELS_MAX / desc->width
+	) {
+		return NULL;
+	}
+
+	if (channels == 0) {
+		channels = desc->channels;
+	}
+
+	px_len = desc->width * desc->height * channels;
+	pixels = (unsigned char *) QOI_MALLOC(px_len);
+	if (!pixels) {
+		return NULL;
+	}
+
+	QOI_ZEROARR(index);
+	px.rgba.r = 0;
+	px.rgba.g = 0;
+	px.rgba.b = 0;
+	px.rgba.a = 255;
+
+	chunks_len = size - (int)sizeof(qoi_padding);
+	for (px_pos = 0; px_pos < px_len; px_pos += channels) {
+		if (run > 0) {
+			run--;
+		}
+		else if (p < chunks_len) {
+			int b1 = bytes[p++];
+
+			if (b1 == QOI_OP_RGB) {
+				px.rgba.r = bytes[p++];
+				px.rgba.g = bytes[p++];
+				px.rgba.b = bytes[p++];
+			}
+			else if (b1 == QOI_OP_RGBA) {
+				px.rgba.r = bytes[p++];
+				px.rgba.g = bytes[p++];
+				px.rgba.b = bytes[p++];
+				px.rgba.a = bytes[p++];
+			}
+			else if ((b1 & QOI_MASK_2) == QOI_OP_INDEX) {
+				px = index[b1];
+			}
+			else if ((b1 & QOI_MASK_2) == QOI_OP_DIFF) {
+				px.rgba.r += ((b1 >> 4) & 0x03) - 2;
+				px.rgba.g += ((b1 >> 2) & 0x03) - 2;
+				px.rgba.b += ( b1       & 0x03) - 2;
+			}
+			else if ((b1 & QOI_MASK_2) == QOI_OP_LUMA) {
+				int b2 = bytes[p++];
+				int vg = (b1 & 0x3f) - 32;
+				px.rgba.r += vg - 8 + ((b2 >> 4) & 0x0f);
+				px.rgba.g += vg;
+				px.rgba.b += vg - 8 +  (b2       & 0x0f);
+			}
+			else if ((b1 & QOI_MASK_2) == QOI_OP_RUN) {
+				run = (b1 & 0x3f);
+			}
+
+			index[QOI_COLOR_HASH(px) % 64] = px;
+		}
+
+		if (channels == 4) {
+			*(qoi_rgba_t*)(pixels + px_pos) = px;
+		}
+		else {
+			pixels[px_pos + 0] = px.rgba.r;
+			pixels[px_pos + 1] = px.rgba.g;
+			pixels[px_pos + 2] = px.rgba.b;
+		}
+	}
+
+	return pixels;
+}
+
+#ifndef QOI_NO_STDIO
+#include <stdio.h>
+
+int qoi_write(const char *filename, const void *data, const qoi_desc *desc) {
+	FILE *f = fopen(filename, "wb");
+	int size;
+	void *encoded;
+
+	if (!f) {
+		return 0;
+	}
+
+	encoded = qoi_encode(data, desc, &size);
+	if (!encoded) {
+		fclose(f);
+		return 0;
+	}
+
+	fwrite(encoded, 1, size, f);
+	fclose(f);
+
+	QOI_FREE(encoded);
+	return size;
+}
+
+void *qoi_read(const char *filename, qoi_desc *desc, int channels) {
+	FILE *f = fopen(filename, "rb");
+	int size, bytes_read;
+	void *pixels, *data;
+
+	if (!f) {
+		return NULL;
+	}
+
+	fseek(f, 0, SEEK_END);
+	size = ftell(f);
+	if (size <= 0) {
+		fclose(f);
+		return NULL;
+	}
+	fseek(f, 0, SEEK_SET);
+
+	data = QOI_MALLOC(size);
+	if (!data) {
+		fclose(f);
+		return NULL;
+	}
+
+	bytes_read = fread(data, 1, size, f);
+	fclose(f);
+
+	pixels = qoi_decode(data, bytes_read, desc, channels);
+	QOI_FREE(data);
+	return pixels;
+}
+
+#endif /* QOI_NO_STDIO */
+#endif /* QOI_IMPLEMENTATION */
diff --git a/include/extern/roguelike.h b/include/extern/roguelike.h
new file mode 100644
index 0000000..97fa70c
--- /dev/null
+++ b/include/extern/roguelike.h
@@ -0,0 +1,2752 @@
+/**
+ * MIT License
+ *
+ * Copyright (c) 2024 Michael H. Mackus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *
+ * roguelike.h
+ *
+ *
+ * A single header library for tile-based games. Most features have a
+ * "rl_*_create" and a "rl_*_destroy" function. The create function allocates
+ * memory and returns a pointer that is assumed to be freed with rl_*_destroy.
+ * You can avoid using malloc & free by defining RL_MALLOC (and optionally
+ * RL_CALLOC and RL_REALLOC) and RL_FREE, or simply manage the memory yourself.
+ *
+ * Make sure to define RL_IMPLEMENTATION once and only once before including
+ * "roguelike.h" to compile the library.
+ *
+ * The primary feature of this library deals with the tile-based maps and map
+ * generation. The functions are prefixed with rl_map and rl_mapgen.
+ *
+ * To generate a map, create the map via rl_map_create then call the function
+ * with the algorithm you wish to use for mapgen. For example:
+ *
+ *   RL_Map *map = rl_map_create(80, 25);
+ *   if (rl_mapgen_bsp(map, RL_MAPGEN_BSP_DEFAULTS) != RL_OK) {
+ *     printf("Error occurred during mapgen!\n");
+ *   }
+ *   ....
+ *   rl_map_destroy(map); // frees the map pointer & internal data
+ *
+ * The rl_bsp methods correspond to a BSP graph containing data for rectangles.
+ * Note that the rl_bsp_split function does allocate memory for the split and
+ * assigns the new left & right nodes to the BSP tree (this data is freed with
+ * rl_bsp_destroy).
+ *
+ * There is also functionality for a simple min heap (or priority queue).
+ * These functions are prefixed with rl_heap. To use these you need to create
+ * a heap with rl_heap_create then insert items with rl_heap_insert. The heap
+ * does not free or allocate memory for items you insert into the heap.
+ *
+ *  RL_Heap *q = rl_heap_create(1, NULL); // NULL comparison function acts as dynamic array
+ *  int val = 5;
+ *  rl_heap_insert(q, &val);
+ *  ....
+ *  int r;
+ *  while ((r = rl_heap_pop(eq))) { ... }
+ *  rl_heap_destroy(q);
+ *
+ * There is also a set of FOV functions - these functions use a simple
+ * shadowcasting algorithm to implement FOV. Create the RL_FOV struct with
+ * rl_fov_create (making sure to free it with rl_fov_destroy), and each time
+ * you want to "update" the FOV you should call "rl_fov_calculate" or
+ * "rl_fov_calculate_ex".
+ *
+ *  RL_FOV *fov = rl_fov_create(80, 25);
+ *  for (;;) { // gameloop
+ *      rl_fov_calculate(fov, map, player_x, player_y, 8); // last arg is FOV radius
+ *      ... // draw map, handle input, etc.
+ *  }
+ *  rl_fov_destroy(fov);
+ *
+ * There is also a set of pathfinding functions - these functions primarily
+ * create and manage Dijkstra graphs for pathfinding. These functions are
+ * prefixed with rl_path, rl_dijkstra, and rl_graph. Paths should be "walked"
+ * with "rl_path_walk" which frees each part of the path passed, returning the
+ * next part of the path; or you can alternatively call "rl_path_destroy".
+ *
+ *  RL_Path *path = rl_path_create(map, RL_XY(0,0), RL_XY(20,20), rl_distance_euclidian, rl_map_is_passable);
+ *  while ((path = rl_path_walk(path))) { ...  } // frees the path
+ *
+ * Dijkstra graphs can be created & scored with rl_dijkstra_create or manually
+ * scored via rl_dijkstra_score* functions. After the graph is scored the
+ * graph is can be walked by finding a "start" node in the graph, and
+ * recursively walking the graph by choosing the lowest scored neighbor. If a
+ * RL_GraphNode has a score of FLT_MAX it has not been scored.
+ *
+ *  // Typically you provide destination for the initial Dijkstra graph
+ *  RL_Graph *graph = rl_dijkstra_create(map, dest, rl_distance_manhattan, rl_map_is_passable);
+ *  // Then find start point in graph
+ *  RL_GraphNode *node = rl_graph_node(graph, start);
+ *  // Then, you "roll downhill" from the start point
+ *  if (node != NULL) {
+ *    RL_GraphNode *lowest_neighbor = rl_graph_node_lowest_neighbor(node);
+ *    // The next point in the path is lowest_neighbor->point
+ *    if (lowest_neighbor) move_player(lowest_neighbor->point);
+ *    ...
+ *  }
+ *  rl_graph_destroy(graph);
+ *
+ *
+ * Preprocessor definitions (define these before including roguelike.h to customize internals):
+ *
+ *
+ *  RL_IMPLEMENTATION                 Define this to compile the library - should only be defined once in one file
+ *  RL_MAX_NEIGHBOR_COUNT             Maximum neighbor count for Dijkstra graphs (defaults to 8). Note this is needed in the function definitions - if you override this, you'll have to define it everywhere you include roguelike.h (just make a wrapper).
+ *  RL_FOV_SYMMETRIC                  Set this to 0 to disable symmetric FOV (defaults to 1)
+ *  RL_MAX_RECURSION                  Maximum recursion (defaults to 100). This is used in FOV to limit recursion when fov_radius is large or -1 (unlimited).
+ *  RL_MAPGEN_BSP_RANDOMISE_ROOM_LOC  Set this to 0 to disable randomizing room locations within bsp (used in rl_mapgen_bsp - defaults to 1)
+ *  RL_ENABLE_PATHFINDING             Set this to 0 to disable pathfinding functionality (defaults to 1)
+ *  RL_ENABLE_FOV                     Set this to 0 to disable field of view functionality (defaults to 1)
+ *  RL_ENABLE_FILE                    Set this to 0 to disable save & load helper functions.
+ *  RL_IS_PASSABLE                    Passable tile logic. Macro function - first argument to the macro is the tile, second is x, third is y.
+ *  RL_IS_OPAQUE                      Opaque tile logic. Macro function - first argument to the macro is the tile, second is x, third is y.
+ *  RL_IS_WALL_TILE                   Wall tile logic, for checking if this tile can connect to other walls. Macro function - first argument to the macro is the tile, second is x, third is y.
+ *  RL_PASSABLE_F                     Set this to your default passable function (defaults to rl_map_is_passable).
+ *  RL_OPAQUE_F                       Set this to your default opaque function (defaults to rl_map_is_opaque).
+ *  RL_WALL_F                         Set this to your default is_wall function (defaults to rl_map_is_wall).
+ *  RL_FOV_DISTANCE_F                 Set this to your default FOV distance function (defaults to rl_distance_euclidian).
+ *  RL_RNG_F                          Set this to your default RNG generation function (defaults to rl_rng_generate).
+ *  RL_ASSERT                         Define this to override the assert function used by the library (defaults to "assert")
+ *  RL_MALLOC                         Define this to override the malloc function used by the library (defaults to "malloc")
+ *  RL_CALLOC                         Define this to override the calloc function used by the library (defaults to "calloc")
+ *  RL_REALLOC                        Define this to override the realloc function used by the library, used in rl_heap_* (defaults to "realloc")
+ *  RL_FREE                           Define this to override the free function used by the library (defaults to "free")
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef RL_ROGUELIKE_H
+#define RL_ROGUELIKE_H
+
+#include <stddef.h>
+
+/* This is a helper since MSVC & c89 don't support compound literals */
+#ifndef RL_CLITERAL
+#if _MSVC_LANG || __cplusplus
+#define RL_CLITERAL(type) type
+#elif __STDC_VERSION__ < 199409L
+#define RL_CLITERAL(type)
+#else
+#define RL_CLITERAL(type) (type)
+#endif
+#endif
+
+/* Bool type in c89 */
+#if __STDC_VERSION__ < 199409L && !__cplusplus
+typedef int bool;
+#define true 1
+#define false 0
+#else
+#include <stdbool.h>
+#endif
+
+/**
+ * Generic structs for library.
+ */
+
+/* each tile is the size of 1 byte, so it can be casted back & forth from char <-> RL_Tile */
+typedef unsigned char RL_Byte;
+
+/* Generic dungeon map structure, supporting hex & square 2d maps, along with the associated tile enum. */
+typedef enum {
+    RL_TileRock = ' ',
+    RL_TileRoom = '.',
+    RL_TileCorridor = '#',
+    RL_TileDoor = '+',
+    RL_TileDoorOpen = '='
+} RL_Tile;
+typedef struct RL_Map {
+    unsigned int width;
+    unsigned int height;
+    RL_Byte *tiles; /* a sequential array of RL_Tiles, stride for each row equals the map width. */
+} RL_Map;
+
+/* BSP tree */
+typedef struct RL_BSP {
+    unsigned int width;
+    unsigned int height;
+    unsigned int x;
+    unsigned int y;
+    struct RL_BSP *parent;
+    struct RL_BSP *left;  /* left child */
+    struct RL_BSP *right; /* right child */
+} RL_BSP;
+
+/* BSP split direction */
+typedef enum {
+    RL_SplitHorizontally, /* split the BSP node on the x axis (splits width) */
+    RL_SplitVertically   /* split the BSP node on the y axis (splits height) */
+} RL_SplitDirection;
+
+/**
+ * Random map generation
+ */
+
+/* Creates an empty map. Make sure to call rl_map_destroy to clear memory. */
+RL_Map *rl_map_create(unsigned int width, unsigned int height);
+
+/* Frees the map & internal memory. */
+void rl_map_destroy(RL_Map *map);
+
+/* Enum representing the type of corridor connection algorithm. RL_ConnectRandomly is the default and results in the
+ * most interesting & aesthetic maps. */
+typedef enum {
+    RL_ConnectNone = 0,       /* don't connect corridors */
+    RL_ConnectRandomly,       /* connect corridors to random leaf nodes (requires RL_ENABLE_PATHFINDING, by default this is on) */
+    RL_ConnectBSP,            /* connect corridors by traversing the BSP graph (faster than above but less circular/interesting maps, requires RL_ENABLE_PATHFINDING) */
+    RL_ConnectSimple          /* connect corridors by traversing the BSP graph without Dijkstra pathfinding (fastest) */
+} RL_MapgenCorridorConnection;
+
+/* The config for BSP map generation - note that the dimensions *include* the walls on both sides, so the min room width
+ * & height the library accepts is 3. */
+typedef struct {
+    unsigned int room_min_width;
+    unsigned int room_max_width;
+    unsigned int room_min_height;
+    unsigned int room_max_height;
+    unsigned int room_padding;
+    RL_MapgenCorridorConnection draw_corridors; /* type of corridor connection algorithm to use */
+    bool draw_doors; /* whether to draw doors while connecting corridors */
+    int max_splits; /* max times to split BSP - set lower for less rooms */
+} RL_MapgenConfigBSP;
+
+/* Provide some defaults for mapgen. */
+#define RL_MAPGEN_BSP_DEFAULTS RL_CLITERAL(RL_MapgenConfigBSP) { \
+    /*.room_min_width =*/      4, \
+    /*.room_max_width =*/      6, \
+    /*.room_min_height =*/     4, \
+    /*.room_max_height =*/     6, \
+    /*.room_padding =*/        1, \
+    /*.draw_corridors =*/      RL_ConnectRandomly, \
+    /*.draw_doors =*/          true, \
+    /*.max_splits =*/          100 \
+}
+
+typedef enum {
+    RL_OK = 0,
+    RL_ErrorMemory,
+    RL_ErrorNullParameter,
+    RL_ErrorMapgenInvalidConfig
+} RL_Status;
+
+/* Generate map with recursive BSP split algorithm. This fills the map tiles with RL_TileRock before generation. */
+RL_Status rl_mapgen_bsp(RL_Map *map, RL_MapgenConfigBSP config);
+
+/* Generates map with recursive BSP split algorithm. This splits the BSP pointer passed, and uses the BSP to constrain
+ * the dimensions of the map generation.
+ *
+ * This allocates memory for the BSP children - make sure to use rl_bsp_destroy or free them yourself. Note that this
+ * does not set the tiles to RL_TileRock before generation. This way you can have separate regions of the map with
+ * different mapgen algorithms. */
+RL_Status rl_mapgen_bsp_ex(RL_Map *map, RL_BSP *bsp, const RL_MapgenConfigBSP *config);
+
+/* The config for BSP map generation - note that the dimensions *include* the walls on both sides, so the min room width
+ * & height the library accepts is 3. */
+typedef struct {
+    unsigned int chance_cell_initialized; /* chance (from 1-100) a cell is initialized with rock */
+    unsigned int birth_threshold;         /* threshold of neighbors for a cell to be born */
+    unsigned int survival_threshold;      /* threshold of neighbors for a cell to die from overpopulation */
+    unsigned int max_iterations;          /* recursion limit */
+    bool draw_corridors;                  /* after generation, whether to randomly draw corridors to unconnected space
+                                           * note - you still need cull_unconnected if you want a fully connected map
+                                           *
+                                           * requires RL_ENABLE_PATHFINDING */
+    bool cull_unconnected;                /* after generation, whether to remove unconnected space from the larger map - requires RL_ENABLE_PATHFINDING */
+    bool fill_border;                     /* after generation, whether to fill the border with rock to ensure enclosed map*/
+} RL_MapgenConfigAutomata;
+
+/* Provide some defaults for automata mapgen. */
+#define RL_MAPGEN_AUTOMATA_DEFAULTS RL_CLITERAL(RL_MapgenConfigAutomata) { \
+    /*.chance_cell_initialized =*/  45, \
+    /*.birth_threshold =*/          5, \
+    /*.survival_threshold =*/       4, \
+    /*.max_iterations =*/           3, \
+    /*.draw_corridors = */          true, \
+    /*.cull_unconnected =*/         true, \
+    /*.fill_border =*/              true \
+}
+
+/* Generate map with cellular automata. This clears out the previous tiles before generation. */
+RL_Status rl_mapgen_automata(RL_Map *map, RL_MapgenConfigAutomata config);
+
+/* Same as above function, but constrains generation according to passed dimensions. */
+RL_Status rl_mapgen_automata_ex(RL_Map *map, unsigned int x, unsigned int y, unsigned int width, unsigned int height,  const RL_MapgenConfigAutomata *config);
+
+/* Generate map with a random maze (via simplistic BFS). Tiles are carved with RL_TileCorridor. Fully connected. */
+RL_Status rl_mapgen_maze(RL_Map *map);
+
+/* Generate map with a random maze (via simplistic BFS). Tiles are carved with RL_TileCorridor. Fully connected. */
+RL_Status rl_mapgen_maze_ex(RL_Map *map, unsigned int x, unsigned int y, unsigned int width, unsigned int height);
+
+/* Connect map via corridors using the supplied BSP graph. */
+RL_Status rl_mapgen_connect_corridors(RL_Map *map, RL_BSP *root, bool draw_doors, RL_MapgenCorridorConnection connection_algorithm);
+
+/**
+ * Generic map helper functions.
+ */
+
+/* Verifies a coordinates is within bounds of map. */
+bool rl_map_in_bounds(const RL_Map *map, unsigned int x, unsigned int y);
+
+/* Checks if a tile is passable. */
+bool rl_map_is_passable(const RL_Map *map, unsigned int x, unsigned int y);
+
+/* Checks if a tile is opaque (for FOV calculations). */
+bool rl_map_is_opaque(const RL_Map *map, unsigned int x, unsigned int y);
+
+/* Get tile at point */
+RL_Byte *rl_map_tile(const RL_Map *map, unsigned int x, unsigned int y);
+
+/* Returns 1 if tile at point matches given parameter. */
+bool rl_map_tile_is(const RL_Map *map, unsigned int x, unsigned int y, RL_Byte tile);
+
+/* Type of wall on the map - idea is they can be bitmasked together (e.g. for corners). See rl_map_wall and other
+ * related functions. */
+typedef enum {
+    RL_WallToWest  = 1,
+    RL_WallToEast  = 1 << 1,
+    RL_WallToNorth = 1 << 2,
+    RL_WallToSouth = 1 << 3,
+    RL_WallOther   = 1 << 7 /* e.g. a wall that has no connecting walls */
+} RL_Wall;
+
+/* A tile is considered a wall if it is touching a passable tile.
+ *
+ * Returns a bitmask of the RL_Wall enum. For example, a wall with a wall tile to the south, west, and east would have a
+ * bitmask of 0b1011. */
+RL_Byte rl_map_wall(const RL_Map *map, unsigned int x, unsigned int y);
+
+/* Is the tile a wall tile? */
+bool rl_map_is_wall(const RL_Map *map, unsigned int x, unsigned int y);
+
+/* Is the wall a corner? */
+bool rl_map_is_corner_wall(const RL_Map *map, unsigned int x, unsigned int y);
+
+/* Is this a wall that is touching a room tile? */
+bool rl_map_is_room_wall(const RL_Map *map, unsigned int x, unsigned int y);
+
+/* A wall that is touching a room tile (e.g. to display it lit). */
+RL_Byte rl_map_room_wall(const RL_Map *map, unsigned int x, unsigned int y);
+
+/**
+ * Simple priority queue implementation
+ */
+
+typedef struct {
+    void **heap;
+    int cap;
+    int len;
+    int (*comparison_f)(const void *heap_item_a, const void *heap_item_b);
+} RL_Heap;
+
+/* Allocates memory for the heap. Make sure to call rl_heap_destroy after you are done.
+ *
+ * capacity - initial capacity for the heap
+ * comparison_f - A comparison function that returns 1 if heap_item_a should be
+ *  popped from the queue before heap_item_b. If NULL the heap will still work
+ *  but order will be undefined. */
+RL_Heap *rl_heap_create(int capacity, int (*comparison_f)(const void *heap_item_a, const void *heap_item_b));
+
+/* Frees the heap & internal memory. */
+void rl_heap_destroy(RL_Heap *h);
+
+/* Return the length of the heap items */
+int rl_heap_length(const RL_Heap *h);
+
+/* Insert item into the heap. This will resize the heap if necessary. */
+bool rl_heap_insert(RL_Heap *h, void *item);
+
+/* Returns & removes an item from the queue. */
+void *rl_heap_pop(RL_Heap *h);
+
+/* Peek at the first item in the queue. This does not remove the item from the queue. */
+void *rl_heap_peek(RL_Heap *h);
+
+/**
+ * BSP Manipulation
+ */
+
+/* Params width & height must be positive. Make sure to free with rl_bsp_destroy. */
+RL_BSP *rl_bsp_create(unsigned int width, unsigned int height);
+
+/* Frees the BSP root & all children */
+void rl_bsp_destroy(RL_BSP *root);
+
+/* Split the BSP by direction - this creates the left & right leaf and */
+/* populates them in the BSP node. Position must be positive and within */
+/* the BSP root node. Also node->left & node->right must be NULL */
+void rl_bsp_split(RL_BSP *node, unsigned int position, RL_SplitDirection direction);
+
+/* Recursively split the BSP. Used for map generation. */
+/* */
+/* Returns true if the BSP was able to split at least once */
+RL_Status rl_bsp_recursive_split(RL_BSP *root, unsigned int min_width, unsigned int min_height, unsigned int max_recursion);
+
+/* Returns 1 if the node is a leaf node. */
+bool rl_bsp_is_leaf(const RL_BSP *node);
+
+/* Return sibling node. Returns NULL if there is no parent (i.e. for the root */
+/* node). */
+RL_BSP *rl_bsp_sibling(const RL_BSP *node);
+
+/* Returns amount of leaves in tree. */
+size_t rl_bsp_leaf_count(const RL_BSP *root);
+
+/* Return the next leaf node to the right if it exists. */
+RL_BSP *rl_bsp_next_leaf(const RL_BSP *node);
+
+/* Returns a random leaf node beneath root */
+RL_BSP* rl_bsp_random_leaf(const RL_BSP *root);
+
+/**
+ * Pathfinding - disable with #define RL_ENABLE_PATHFINDING 0
+ */
+
+/* A point on the map used for pathfinding. The points are a float type for flexibility since pathfinding works for maps */
+/* of all data types. */
+typedef struct RL_Point {
+    float x, y;
+} RL_Point;
+
+/* Macro to easily create a RL_Point (compound literals only available in C99, which MSVC doesn't support). */
+#define RL_XY(x, y) RL_CLITERAL(RL_Point) { (float)(x), (float)(y) }
+
+/* Max neighbors for a pathfinding node. */
+#ifndef RL_MAX_NEIGHBOR_COUNT
+#define RL_MAX_NEIGHBOR_COUNT 8
+#endif
+
+/* Represents a graph of pathfinding nodes that has been scored for pathfinding (e.g. with the Dijkstra algorithm). */
+/* TODO store weights on graph nodes ? */
+typedef struct RL_GraphNode {
+    float score; /* will be FLT_MAX for an unreachable/unscored node in the Dijkstra algorithm */
+    RL_Point point;
+    size_t neighbors_length;
+    struct RL_GraphNode *neighbors[RL_MAX_NEIGHBOR_COUNT];
+} RL_GraphNode;
+typedef struct RL_Graph {
+    size_t length; /* length of nodes */
+    RL_GraphNode *nodes; /* array of nodes - length will be the size of the map.width * map.height */
+} RL_Graph;
+
+/* A path is a linked list of paths. You can "walk" a path using rl_path_walk which will simultaneously free the
+ * previous path. */
+typedef struct RL_Path {
+    RL_Point point;
+    struct RL_Path *next;
+} RL_Path;
+
+/* Useful distance functions for pathfinding. */
+float rl_distance_manhattan(RL_Point node, RL_Point end);
+float rl_distance_euclidian(RL_Point node, RL_Point end);
+float rl_distance_chebyshev(RL_Point node, RL_Point end);
+
+/* Custom distance function for pathfinding - calculates distance between map nodes */
+typedef float (*RL_DistanceFun)(RL_Point from, RL_Point to);
+
+/* Custom passable function for pathfinding. Return 0 to prevent neighbor from being included in graph. */
+typedef bool (*RL_PassableFun)(void *context, unsigned int x, unsigned int y);
+
+/* Custom score function for pathfinding - most users won't need this, but it gives flexibility in weighting the
+ * Dijkstra graph. Note that Dijkstra expects you to add the current node's score to the newly calculated score. */
+typedef float (*RL_ScoreFun)(const RL_GraphNode *current, const RL_GraphNode *neighbor, void *context);
+
+/* Generates a line starting at from ending at to. Each path in the line will be incremented by step. */
+RL_Path *rl_line_create(RL_Point from, RL_Point to, float step);
+
+/* Find a path between start and end via Dijkstra algorithm. Make sure to call rl_path_destroy when done with path.
+ * Pass NULL to distance_f to use rough approximation for euclidian. */
+RL_Path *rl_path_create(const RL_Map *map, RL_Point start, RL_Point end, RL_DistanceFun distance_f);
+
+/* Find a path between start and end via the scored Dijkstra graph. Make sure to call rl_path_destroy when done with path (or
+ * use rl_path_walk). */
+RL_Path *rl_path_create_from_graph(const RL_Graph *graph, RL_Point start);
+
+/* Convenience function to "walk" the path. This will return the next path, freeing the current path. You do not need to
+ * call rl_path_destroy if you walk the full path. */
+RL_Path *rl_path_walk(RL_Path *path);
+
+/* Frees the path & all linked nodes. */
+void rl_path_destroy(RL_Path *path);
+
+/* Dijkstra pathfinding algorithm. Pass NULL to distance_f to use rough approximation for euclidian.
+ *
+ * You can use Dijkstra maps for pathfinding, simple AI, and much more. For example, by setting the player point to
+ * "start" then you can pick the highest scored tile in the map and set that as the new "start" point. As with all
+ * Dijkstra maps, you just walk the map by picking the lowest scored neighbor. This is a simplistic AI resembling a
+ * wounded NPC fleeing from the player.
+ *
+ * Make sure to destroy the resulting RL_Graph with rl_graph_destroy. */
+RL_Graph *rl_dijkstra_create(const RL_Map *map,
+                            RL_Point start,
+                            RL_DistanceFun distance_f);
+
+/* Dijkstra pathfinding algorithm. Uses RL_Graph so that your code doesn't need to rely on RL_Map. Each node's
+ * distance should equal FLT_MAX in the resulting graph if it is impassable. */
+void rl_dijkstra_score(RL_Graph *graph, RL_Point start, RL_DistanceFun distance_f);
+
+/* Dijkstra pathfinding algorithm for advanced use cases such as weighting certain tiles higher than others. Uses
+ * RL_Graph so that your code doesn't need to rely on RL_Map. Each node's distance should equal FLT_MAX in the resulting
+ * graph if it is impassable. Most users should just use rl_dijkstra_score - only use this if you have a specific need. */
+void rl_dijkstra_score_ex(RL_Graph *graph, RL_Point start, RL_ScoreFun score_f, void *score_context);
+
+/* Returns a the largest connected area (of passable tiles) on the map. Make sure to destroy the graph with
+ * rl_graph_destroy after you are done. */
+RL_Graph *rl_graph_floodfill_largest_area(const RL_Map *map);
+
+/* Create an unscored graph based on the 2d map. Make sure to call rl_graph_destroy when finished. */
+RL_Graph *rl_graph_create(const RL_Map *map);
+
+/* Create an unscored graph based on the 2d map. Make sure to call rl_graph_destroy when finished. */
+RL_Graph *rl_graph_create_ex(const RL_Map *map, void *context, RL_PassableFun passable_f, bool allow_diagonal_neighbors);
+
+/* Reset scores of Dijkstra map */
+void rl_graph_reset(RL_Graph *graph);
+
+/* Add two graphs together, adding their scores */
+/* Note that this assumes the graph lengths are identical */
+void rl_graph_add(RL_Graph *graph, const RL_Graph *graph_b);
+
+/* Multiply the scores of a graph by a coefficient (e.g. to weight a graph) */
+void rl_graph_weight(RL_Graph *graph, float coefficient);
+
+/* Frees the graph & internal memory. */
+void rl_graph_destroy(RL_Graph *graph);
+
+/* Checks if coordinate is scored in graph (e.g. its score is less than FLT_MAX). */
+bool rl_graph_is_scored(const RL_Graph *graph, RL_Point point);
+
+/* Returns the node of a point within a graph if it exists. */
+RL_GraphNode *rl_graph_node(const RL_Graph *graph, RL_Point point);
+
+/* Returns the lowest scored neighbor within a graph if it exists - returns NULL if the lowest scored neighbor is scored
+ * with FLT_MAX (meaning it is unscored). */
+RL_GraphNode *rl_graph_node_lowest_neighbor(const RL_GraphNode *node);
+
+/* Sort node neighbors based on score - lowest score will be at node->neighbors[0] */
+void rl_graph_node_sort_neighbors(RL_GraphNode *node);
+
+/**
+ * FOV - disable with #define RL_ENABLE_FOV 0
+ */
+
+/* Structure containing information for the FOV algorithm, along with the associated visibility enum. */
+typedef enum {
+    RL_TileCannotSee = 0,
+    RL_TileVisible,
+    RL_TileSeen
+} RL_TileVisibility;
+typedef struct {
+    unsigned int width;
+    unsigned int height;
+    RL_Byte *visibility; /* a sequential array of RL_Visibility, stride for each row = the map width */
+} RL_FOV;
+
+/* Creates empty FOV and fills it with opaque tiles. Make sure to call rl_fov_destroy to clear memory. */
+RL_FOV *rl_fov_create(unsigned int width, unsigned int height);
+
+/* Frees the FOV & internal memory. */
+void rl_fov_destroy(RL_FOV *fov);
+
+/* Function to determine if a tile is within the range of the FOV. Returns true if point is in range. */
+typedef bool (*RL_IsInRangeFun)(unsigned int x, unsigned int y, void *context);
+/* Function to determine if a tile is considered Opaque for FOV calculation. Make sure you do bounds checking that the
+ * point is within your map. Returns true if point is considered "opaque" (i.e. unable to see through). */
+typedef bool (*RL_IsOpaqueFun)(unsigned int x, unsigned int y, void *context);
+/* Function to mark a tile as visible within the FOV. Make sure you do bounds checking that the point is within your map. */
+typedef void (*RL_MarkAsVisibleFun)(unsigned int x, unsigned int y, void *context);
+
+/* Calculate FOV using simple shadowcasting algorithm. Set fov_radius to a negative value to have unlimited FOV (note
+ * this is limited by RL_MAX_RECURSION).
+ *
+ * Note that this sets previously visible tiles to RL_TileSeen. */
+void rl_fov_calculate(RL_FOV *fov, const RL_Map *map, unsigned int x, unsigned int y, int fov_radius);
+
+/* Calculate FOV using simple shadowcasting algorithm. Set fov_radius to a negative value to have unlimited FOV (note
+ * this is limited by RL_MAX_RECURSION).
+ *
+ * Generic version of above function. */
+void rl_fov_calculate_ex(void *context, unsigned int x, unsigned int y, RL_IsInRangeFun in_range_f, RL_IsOpaqueFun opaque_f, RL_MarkAsVisibleFun mark_visible_f);
+
+/* Checks if a point is visible within FOV. Make sure to call rl_fov_calculate first. */
+bool rl_fov_is_visible(const RL_FOV *map, unsigned int x, unsigned int y);
+
+/* Checks if a point has been seen within FOV. Make sure to call rl_fov_calculate first. */
+bool rl_fov_is_seen(const RL_FOV *map, unsigned int x, unsigned int y);
+
+/**
+ * Random number generation
+ */
+
+/* Default implementation of RNG using standard library. */
+unsigned int rl_rng_generate(unsigned int min, unsigned int max);
+
+/**
+ * Saving & Loading helper functions - to use these make sure to open the file beforehand in binary mode.
+ *
+ * The file is a FILE pointer (void* so we don't have to depend on stdio).
+ */
+
+bool rl_file_save_map(const RL_Map *data, void *file);
+bool rl_file_load_map(RL_Map **data, void *file);
+bool rl_file_save_fov(const RL_FOV *data, void *file);
+bool rl_file_load_fov(RL_FOV **data, void *file);
+#endif /* RL_ROGUELIKE_H */
+
+#ifdef RL_IMPLEMENTATION
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <limits.h>
+
+#ifndef RL_FOV_SYMMETRIC
+#define RL_FOV_SYMMETRIC 1
+#endif
+
+#ifndef RL_MAX_RECURSION
+#define RL_MAX_RECURSION 100
+#endif
+
+/* define this to 0 to put the rooms in the middle of the BSP leaf during dungeon generation */
+#ifndef RL_MAPGEN_BSP_RANDOMISE_ROOM_LOC
+#define RL_MAPGEN_BSP_RANDOMISE_ROOM_LOC 1
+#endif
+
+/* define to 0 to disable pathfinding */
+#ifndef RL_ENABLE_PATHFINDING
+#define RL_ENABLE_PATHFINDING 1
+#endif
+
+/* define to 0 to disable FOV */
+#ifndef RL_ENABLE_FOV
+#define RL_ENABLE_FOV 1
+#endif
+
+/* define to 0 to disable save & load */
+#ifndef RL_ENABLE_FILE
+#define RL_ENABLE_FILE 1
+#endif
+
+/* convenience macro for custom passable tile logic (for mapgen & pathfinding) */
+#ifndef RL_IS_PASSABLE
+#define RL_IS_PASSABLE(t, x, y) (t == RL_TileRoom || t == RL_TileCorridor || t == RL_TileDoor || t == RL_TileDoorOpen)
+#endif
+/* convenience macro for custom opaque tile logic (for FOV) */
+#ifndef RL_IS_OPAQUE
+#define RL_IS_OPAQUE(t, x, y) (t == RL_TileDoor || !RL_IS_PASSABLE(t, x, y))
+#endif
+/* convenience macro for custom wall tile logic (for connections) */
+#ifndef RL_IS_WALL_TILE
+#define RL_IS_WALL_TILE(t, x, y) (!RL_IS_PASSABLE(t,x,y) || t == RL_TileDoor || t == RL_TileDoorOpen)
+#endif
+
+#ifndef RL_PASSABLE_F
+#define RL_PASSABLE_F rl_map_is_passable
+#endif
+#ifndef RL_OPAQUE_F
+#define RL_OPAQUE_F rl_map_is_opaque
+#endif
+#ifndef RL_WALL_F
+#define RL_WALL_F rl_map_is_wall
+#endif
+#ifndef RL_FOV_DISTANCE_F
+#define RL_FOV_DISTANCE_F rl_distance_euclidian
+#endif
+#ifndef RL_RNG_F
+#define RL_RNG_F rl_rng_generate
+#endif
+#ifndef RL_ASSERT
+#include <assert.h>
+#define RL_ASSERT(expr)		(assert(expr));
+#endif
+#ifndef RL_MALLOC
+#define RL_MALLOC malloc
+#endif
+#ifndef RL_CALLOC
+#define RL_CALLOC calloc
+#endif
+#ifndef RL_REALLOC
+#define RL_REALLOC realloc
+#endif
+#ifndef RL_FREE
+#define RL_FREE free
+#endif
+
+#define RL_UNUSED(x) (void)x
+
+#if RL_ENABLE_PATHFINDING
+#include <float.h>
+#include <math.h>
+#endif
+
+RL_Map *rl_map_create(unsigned int width, unsigned int height)
+{
+    RL_Map *map;
+    unsigned char *memory;
+    RL_ASSERT(width*height < UINT_MAX);
+    RL_ASSERT(width > 0 && height > 0);
+    map = NULL;
+    /* allocate all the memory we need at once */
+    memory = (unsigned char*) RL_MALLOC(sizeof(*map) + sizeof(*map->tiles)*width*height);
+    RL_ASSERT(memory);
+    if (memory == NULL) return NULL;
+    map = (RL_Map*) memory;
+    RL_ASSERT(map);
+    map->width = width;
+    map->height = height;
+    map->tiles = (RL_Byte*) (memory + sizeof(*map));
+    RL_ASSERT(map->tiles);
+    memset(map->tiles, RL_TileRock, sizeof(*map->tiles)*map->width*map->height);
+
+    return map;
+}
+
+void rl_map_destroy(RL_Map *map)
+{
+    if (map) {
+        RL_FREE(map);
+    }
+}
+
+bool rl_map_in_bounds(const RL_Map *map, unsigned int x, unsigned int y)
+{
+    return x < map->width && y < map->height;
+}
+
+bool rl_map_is_passable(const RL_Map *map, unsigned int x, unsigned int y)
+{
+    if (rl_map_in_bounds(map, x, y)) {
+        return RL_IS_PASSABLE(map->tiles[y * map->width + x], x, y);
+    }
+
+    return 0;
+}
+
+bool rl_map_is_opaque(const RL_Map *map, unsigned int x, unsigned int y)
+{
+    if (!rl_map_in_bounds(map, x, y)) {
+        return true;
+    }
+
+    return RL_IS_OPAQUE(map->tiles[y * map->width + x], x, y);
+}
+
+RL_Byte *rl_map_tile(const RL_Map *map, unsigned int x, unsigned int y)
+{
+    if (rl_map_in_bounds(map, x, y)) {
+        return &map->tiles[x + y*map->width];
+    }
+
+    return NULL;
+}
+
+bool rl_map_is_wall(const RL_Map *map, unsigned int x, unsigned int y)
+{
+    if (!rl_map_in_bounds(map, x, y))
+        return 0;
+    if (RL_IS_WALL_TILE(map->tiles[x + y*map->width], x, y)) {
+        return RL_PASSABLE_F(map, x, y + 1) ||
+               RL_PASSABLE_F(map, x, y - 1) ||
+               RL_PASSABLE_F(map, x + 1, y) ||
+               RL_PASSABLE_F(map, x - 1, y) ||
+               RL_PASSABLE_F(map, x + 1, y - 1) ||
+               RL_PASSABLE_F(map, x - 1, y - 1) ||
+               RL_PASSABLE_F(map, x + 1, y + 1) ||
+               RL_PASSABLE_F(map, x - 1, y + 1);
+    }
+
+    return 0;
+}
+
+static bool rl_map_wall_connects_ew(const RL_Map *map, unsigned int x, unsigned int y)
+{
+    return (rl_map_in_bounds(map, x, y - 1) && !RL_IS_WALL_TILE(map->tiles[x+(y-1)*map->width], x, y - 1)) ||
+           (rl_map_in_bounds(map, x, y + 1) && !RL_IS_WALL_TILE(map->tiles[x+(y+1)*map->width], x, y + 1));
+}
+static bool rl_map_wall_connects_ns(const RL_Map *map, unsigned int x, unsigned int y)
+{
+    return (rl_map_in_bounds(map, x - 1, y) && !RL_IS_WALL_TILE(map->tiles[(x-1)+y*map->width], x - 1, y)) ||
+           (rl_map_in_bounds(map, x + 1, y) && !RL_IS_WALL_TILE(map->tiles[(x+1)+y*map->width], x + 1, y));
+}
+
+/* checks if target tile is connecting from source (e.g. they can reach it) */
+RL_Byte rl_map_wall(const RL_Map *map, unsigned int x, unsigned int y)
+{
+    RL_Byte mask = 0;
+    if (!RL_WALL_F(map, x, y))
+        return mask;
+    if ((rl_map_in_bounds(map, x + 1, y    ) && RL_IS_WALL_TILE(map->tiles[(x+1)+y*map->width], x + 1, y    )) && (rl_map_wall_connects_ew(map, x, y    ) || rl_map_wall_connects_ew(map, x + 1, y)))
+        mask |= RL_WallToEast;
+    if ((rl_map_in_bounds(map, x - 1, y    ) && RL_IS_WALL_TILE(map->tiles[(x-1)+y*map->width], x - 1, y    )) && (rl_map_wall_connects_ew(map, x, y    ) || rl_map_wall_connects_ew(map, x - 1, y)))
+        mask |= RL_WallToWest;
+    if ((rl_map_in_bounds(map, x    , y - 1) && RL_IS_WALL_TILE(map->tiles[x+(y-1)*map->width], x    , y - 1)) && (rl_map_wall_connects_ns(map, x, y    ) || rl_map_wall_connects_ns(map, x    , y - 1)))
+        mask |= RL_WallToNorth;
+    if ((rl_map_in_bounds(map, x    , y + 1) && RL_IS_WALL_TILE(map->tiles[x+(y+1)*map->width], x    , y + 1)) && (rl_map_wall_connects_ns(map, x, y    ) || rl_map_wall_connects_ns(map, x    , y + 1)))
+        mask |= RL_WallToSouth;
+    return mask ? mask : RL_WallOther;
+}
+
+bool rl_map_is_corner_wall(const RL_Map *map, unsigned int x, unsigned int y)
+{
+    int wall = rl_map_wall(map, x, y);
+    if (!wall) return 0;
+    return (wall & RL_WallToWest && wall & RL_WallToNorth) ||
+           (wall & RL_WallToWest && wall & RL_WallToSouth) ||
+           (wall & RL_WallToEast && wall & RL_WallToNorth) ||
+           (wall & RL_WallToEast && wall & RL_WallToSouth);
+}
+
+bool rl_map_tile_is(const RL_Map *map, unsigned int x, unsigned int y, RL_Byte tile)
+{
+    if (!rl_map_in_bounds(map, x, y)) return 0;
+    return map->tiles[x + y*map->width] == tile;
+}
+
+bool rl_map_is_room_wall(const RL_Map *map, unsigned int x, unsigned int y)
+{
+    if (!RL_WALL_F(map, x, y))
+        return 0;
+
+    return rl_map_tile_is(map, x, y + 1,     RL_TileRoom) ||
+           rl_map_tile_is(map, x, y - 1,     RL_TileRoom) ||
+           rl_map_tile_is(map, x + 1, y,     RL_TileRoom) ||
+           rl_map_tile_is(map, x - 1, y,     RL_TileRoom) ||
+           rl_map_tile_is(map, x + 1, y - 1, RL_TileRoom) ||
+           rl_map_tile_is(map, x - 1, y - 1, RL_TileRoom) ||
+           rl_map_tile_is(map, x + 1, y + 1, RL_TileRoom) ||
+           rl_map_tile_is(map, x - 1, y + 1, RL_TileRoom);
+}
+
+RL_Byte rl_map_room_wall(const RL_Map *map, unsigned int x, unsigned int y)
+{
+    RL_Byte mask = 0;
+    if (!rl_map_is_room_wall(map, x,     y))
+        return mask;
+    if (rl_map_is_room_wall(map,  x + 1, y))
+        mask |= RL_WallToEast;
+    if (rl_map_is_room_wall(map,  x - 1, y))
+        mask |= RL_WallToWest;
+    if (rl_map_is_room_wall(map,  x,     y - 1))
+        mask |= RL_WallToNorth;
+    if (rl_map_is_room_wall(map,  x,     y + 1))
+        mask |= RL_WallToSouth;
+    return mask ? mask : RL_WallOther;
+}
+
+unsigned int rl_rng_generate(unsigned int min, unsigned int max)
+{
+    int rnd;
+
+    RL_ASSERT(max >= min);
+    RL_ASSERT(max < RAND_MAX);
+    RL_ASSERT(max < UINT_MAX);
+
+    if (max < min || max >= RAND_MAX || max >= UINT_MAX)
+        return min;
+    if (min == max)
+        return min;
+
+    rnd = rand();
+    if (rnd < 0) rnd *= -1; /* fixes issue on LLVM MOS */
+
+    /* produces more uniformity than using mod */
+    return min + rnd / (RAND_MAX / (max - min + 1) + 1);
+}
+
+RL_BSP *rl_bsp_create(unsigned int width, unsigned int height)
+{
+    RL_BSP *bsp;
+
+    RL_ASSERT(width > 0 && height > 0);
+    bsp = (RL_BSP*) RL_CALLOC(1, sizeof(*bsp));
+    if (bsp == NULL) return NULL;
+    bsp->width = width;
+    bsp->height = height;
+
+    return bsp;
+}
+void rl_bsp_destroy(RL_BSP* root)
+{
+    if (root) {
+        if (root->left) {
+            rl_bsp_destroy(root->left);
+            root->left = NULL;
+        }
+        if (root->right) {
+            rl_bsp_destroy(root->right);
+            root->right = NULL;
+        }
+        RL_FREE(root);
+    }
+}
+
+void rl_bsp_split(RL_BSP *node, unsigned int position, RL_SplitDirection direction)
+{
+    RL_BSP *left, *right;
+
+    /* can't split something already split */
+    RL_ASSERT(node->left == NULL && node->right == NULL);
+
+    if (node->left || node->right)
+        return;
+
+    if (direction == RL_SplitVertically && position >= node->height)
+        return;
+    if (direction == RL_SplitHorizontally && position >= node->width)
+        return;
+
+    left = (RL_BSP*) RL_CALLOC(1, sizeof(RL_BSP));
+    if (left == NULL)
+        return;
+    right = (RL_BSP*) RL_CALLOC(1, sizeof(RL_BSP));
+    if (right == NULL) {
+        RL_FREE(left);
+        return;
+    }
+
+    if (direction == RL_SplitVertically) {
+        left->width = node->width;
+        left->height = position;
+        left->x = node->x;
+        left->y = node->y;
+        right->width = node->width;
+        right->height = node->height - position;
+        right->x = node->x;
+        right->y = node->y + position;
+    } else {
+        left->width = position;
+        left->height = node->height;
+        left->x = node->x;
+        left->y = node->y;
+        right->width = node->width - position;
+        right->height = node->height;
+        right->x = node->x + position;
+        right->y = node->y;
+    }
+
+    left->parent = right->parent = node;
+    node->left = left;
+    node->right = right;
+}
+
+RL_Status rl_bsp_recursive_split(RL_BSP *root, unsigned int min_width, unsigned int min_height, unsigned int max_recursion)
+{
+    unsigned int width, height, split_position;
+    RL_SplitDirection dir;
+    RL_BSP *left, *right;
+    RL_Status ret;
+
+    RL_ASSERT(root);
+    RL_ASSERT(min_width > 0 && min_height > 0 && root != NULL);
+    RL_ASSERT(min_width <= root->width && min_height <= root->height);
+
+    if (root == NULL)
+        return RL_ErrorNullParameter;
+    if (max_recursion <= 0)
+        return RL_OK;
+
+    width = root->width;
+    height = root->height;
+
+    /* determine split dir & split */
+    if (RL_RNG_F(0, 1)) {
+        if (width < min_width*2)
+            dir = RL_SplitVertically;
+        else
+            dir = RL_SplitHorizontally;
+    } else {
+        if (height < min_height*2)
+            dir = RL_SplitHorizontally;
+        else
+            dir = RL_SplitVertically;
+    }
+
+    if (dir == RL_SplitHorizontally) {
+        /* cannot split if current node size is too small - end splitting */
+        if (width < min_width*2)
+            return RL_OK;
+        split_position = width / 2;
+    } else {
+        /* cannot split if current node size is too small - end splitting */
+        if (height < min_height*2)
+            return RL_OK;
+        split_position = height / 2;
+    }
+
+    rl_bsp_split(root, split_position, dir);
+
+    /* continue recursion */
+    left = root->left;
+    right = root->right;
+
+    if (left == NULL || right == NULL)
+        return RL_ErrorMemory;
+
+    ret = rl_bsp_recursive_split(left, min_width, min_height, max_recursion - 1);
+    if (ret != RL_OK) {
+        RL_FREE(left);
+        RL_FREE(right);
+        root->left = root->right = NULL;
+        return ret;
+    }
+
+    ret = rl_bsp_recursive_split(right, min_width, min_height, max_recursion - 1);
+    if (ret != RL_OK) {
+        RL_FREE(left);
+        RL_FREE(right);
+        root->left = root->right = NULL;
+        return ret;
+    }
+
+    return RL_OK;
+}
+
+bool rl_bsp_is_leaf(const RL_BSP *node)
+{
+    if (node == NULL) return 0;
+    return (node->left == NULL && node->right == NULL);
+}
+
+RL_BSP *rl_bsp_sibling(const RL_BSP *node)
+{
+    if (node && node->parent) {
+        if (node->parent->left == node)
+            return node->parent->right;
+        if (node->parent->right == node)
+            return node->parent->left;
+
+        RL_ASSERT("BSP structure is invalid" && 0); /* BSP structure is invalid */
+    }
+
+    return NULL;
+}
+
+RL_BSP *rl_bsp_next_node_recursive_down(RL_BSP *node, int depth)
+{
+    if (node == NULL)
+        return NULL;
+    if (depth == 0) /* found the node */
+        return node;
+    if (node->left == NULL)
+        return NULL;
+    return rl_bsp_next_node_recursive_down(node->left, depth + 1);
+}
+RL_BSP *rl_bsp_next_node_recursive(RL_BSP *node, int depth)
+{
+    if (node == NULL || node->parent == NULL)
+        return NULL;
+    if (node->parent->left == node) /* traverse back down */
+        return rl_bsp_next_node_recursive_down(node->parent->right, depth);
+    return rl_bsp_next_node_recursive(node->parent, depth - 1);
+}
+RL_BSP *rl_bsp_next_node(RL_BSP *node)
+{
+    if (node == NULL || node->parent == NULL)
+        return NULL;
+
+    /* LOOP up until we are on the left, then go back down */
+    return rl_bsp_next_node_recursive(node, 0);
+}
+
+RL_BSP *rl_bsp_next_leaf_recursive_down(RL_BSP *node)
+{
+    if (node == NULL)
+        return NULL;
+    if (rl_bsp_is_leaf(node)) /* found the node */
+        return node;
+    if (node->left == NULL)
+        return NULL;
+    return rl_bsp_next_leaf_recursive_down(node->left);
+}
+RL_BSP *rl_bsp_next_leaf_recursive(const RL_BSP *node)
+{
+    if (node == NULL || node->parent == NULL)
+        return NULL;
+    if (node->parent->left == node) /* traverse back down */
+        return rl_bsp_next_leaf_recursive_down(node->parent->right);
+    return rl_bsp_next_leaf_recursive(node->parent);
+}
+RL_BSP *rl_bsp_next_leaf(const RL_BSP *node)
+{
+    if (node == NULL || node->parent == NULL)
+        return NULL;
+    RL_ASSERT(rl_bsp_is_leaf(node));
+
+    /* LOOP up until we are on the left, then go back down */
+    return rl_bsp_next_leaf_recursive(node);
+}
+RL_BSP* rl_bsp_random_leaf(const RL_BSP *root)
+{
+    const RL_BSP *node;
+
+    if (root == NULL)
+        return NULL;
+
+    node = root;
+    while (!rl_bsp_is_leaf(node)) {
+        if (RL_RNG_F(0, 1)) {
+            node = node->left;
+        } else {
+            node = node->right;
+        }
+    }
+
+    return (RL_BSP*) node;
+}
+
+size_t rl_bsp_leaf_count(const RL_BSP *root)
+{
+    int count;
+    const RL_BSP *node;
+    if (root == NULL) return 0;
+    RL_ASSERT(root->parent == NULL);
+    /* find first leaf */
+    node = root;
+    while (node->left != NULL) {
+        node = node->left;
+    }
+    /* count leaves */
+    count = 1;
+    while ((node = rl_bsp_next_leaf(node)) != NULL) {
+        count++;
+    }
+    return count;
+}
+
+static void rl_map_bsp_generate_room(RL_Map *map, unsigned int room_width, unsigned int room_height, unsigned int room_x, unsigned int room_y)
+{
+    unsigned int x, y;
+    RL_ASSERT(map && room_width + room_x <= map->width);
+    RL_ASSERT(map && room_height + room_y <= map->height);
+    if (map == NULL) return;
+    for (x = room_x; x < room_x + room_width; ++x) {
+        for (y = room_y; y < room_y + room_height; ++y) {
+            if (x == room_x || x == room_x + room_width - 1 ||
+                    y == room_y || y == room_y + room_height - 1
+               ) {
+                /* set sides of room to walls */
+                map->tiles[y*map->width + x] = RL_TileRock;
+            } else {
+                map->tiles[y*map->width + x] = RL_TileRoom;
+            }
+        }
+    }
+}
+static void rl_map_bsp_generate_rooms(RL_BSP *node, RL_Map *map, unsigned int room_min_width, unsigned int room_max_width, unsigned int room_min_height, unsigned int room_max_height, unsigned int room_padding)
+{
+    RL_ASSERT(map);
+    RL_ASSERT(room_min_width < room_max_width);
+    RL_ASSERT(room_min_height < room_max_height);
+    RL_ASSERT(room_max_width + room_padding*2 < UINT_MAX);
+    RL_ASSERT(room_max_height + room_padding*2 < UINT_MAX);
+    RL_ASSERT(room_min_width > 2 && room_min_height > 2); /* width of 2 can end up having rooms made of nothing but walls */
+    RL_ASSERT(node && room_min_width < node->width);
+    RL_ASSERT(node && room_min_height < node->height);
+    RL_ASSERT(node && room_max_width <= node->width);
+    RL_ASSERT(node && room_max_height <= node->height);
+    if (map == NULL) return;
+    if (node && node->left) {
+        if (rl_bsp_is_leaf(node->left)) {
+            unsigned int room_width, room_height, room_x, room_y;
+            RL_BSP *leaf = node->left;
+            room_width = RL_RNG_F(room_min_width, room_max_width);
+            if (room_width + room_padding*2 > leaf->width)
+                room_width = leaf->width - room_padding*2;
+            room_height = RL_RNG_F(room_min_height, room_max_height);
+            if (room_height + room_padding*2 > leaf->height)
+                room_height = leaf->height - room_padding*2;
+#if(RL_MAPGEN_BSP_RANDOMISE_ROOM_LOC)
+            room_x = RL_RNG_F(leaf->x + room_padding, leaf->x + leaf->width - room_width - room_padding);
+            room_y = RL_RNG_F(leaf->y + room_padding, leaf->y + leaf->height - room_height - room_padding);
+#else
+            room_x = leaf->x + leaf->width/2 - room_width/2 - room_padding/2;
+            room_y = leaf->y + leaf->height/2 - room_height/2 - room_padding/2;
+#endif
+
+            rl_map_bsp_generate_room(map, room_width, room_height, room_x, room_y);
+        } else {
+            rl_map_bsp_generate_rooms(node->left, map, room_min_width, room_max_width, room_min_height, room_max_height, room_padding);
+        }
+    }
+    if (node && node->right) {
+        if (rl_bsp_is_leaf(node->left)) {
+            unsigned int room_width, room_height, room_x, room_y;
+            RL_BSP *leaf = node->right;
+            room_width = RL_RNG_F(room_min_width, room_max_width);
+            if (room_width + room_padding*2 > leaf->width)
+                room_width = leaf->width - room_padding*2;
+            room_height = RL_RNG_F(room_min_height, room_max_height);
+            if (room_height + room_padding*2 > leaf->height)
+                room_height = leaf->height - room_padding*2;
+#if(RL_MAPGEN_BSP_RANDOMISE_ROOM_LOC)
+            room_x = RL_RNG_F(leaf->x + room_padding, leaf->x + leaf->width - room_width - room_padding);
+            room_y = RL_RNG_F(leaf->y + room_padding, leaf->y + leaf->height - room_height - room_padding);
+#else
+            room_x = leaf->x + leaf->width/2 - room_width/2 - room_padding/2;
+            room_y = leaf->y + leaf->height/2 - room_height/2 - room_padding/2;
+#endif
+
+            rl_map_bsp_generate_room(map, room_width, room_height, room_x, room_y);
+        } else {
+            rl_map_bsp_generate_rooms(node->right, map, room_min_width, room_max_width, room_min_height, room_max_height, room_padding);
+        }
+    }
+}
+
+RL_Status rl_mapgen_bsp(RL_Map *map, RL_MapgenConfigBSP config)
+{
+    RL_Status ret;
+    RL_BSP *bsp;
+    RL_ASSERT(map);
+    if (map == NULL) return RL_ErrorMemory;
+    bsp = rl_bsp_create(map->width, map->height);
+    RL_ASSERT(bsp);
+    if (bsp == NULL) return RL_ErrorMemory;
+    memset(map->tiles, RL_TileRock, sizeof(*map->tiles)*map->width*map->height);
+    ret = rl_mapgen_bsp_ex(map, bsp, &config);
+    rl_bsp_destroy(bsp);
+
+    return ret;
+}
+
+RL_Status rl_mapgen_bsp_ex(RL_Map *map, RL_BSP *root, const RL_MapgenConfigBSP *config)
+{
+    RL_Status ret;
+
+    RL_ASSERT(map);
+    RL_ASSERT(root);
+    RL_ASSERT(root->width > 0 && root->height > 0);
+    RL_ASSERT(root->x < root->width && root->y < root->height);
+    RL_ASSERT(config);
+    RL_ASSERT(config->room_min_width > 0 && config->room_max_width >= config->room_min_width && config->room_min_height > 0 && config->room_max_height >= config->room_min_height);
+    RL_ASSERT(config->room_max_width <= map->width && config->room_max_height <= map->height);
+    RL_ASSERT(config->max_splits > 0);
+
+    if (map == NULL || root == NULL || config == NULL) {
+        return RL_ErrorNullParameter;
+    }
+
+    ret = rl_bsp_recursive_split(root, config->room_max_width + config->room_padding, config->room_max_height + config->room_padding, config->max_splits);
+    if (ret != RL_OK) return ret;
+    rl_map_bsp_generate_rooms(root, map, config->room_min_width, config->room_max_width, config->room_min_height, config->room_max_height, config->room_padding);
+    ret = rl_mapgen_connect_corridors(map, root, config->draw_doors, config->draw_corridors);
+    if (ret != RL_OK) return ret;
+
+    /* if (config->use_secret_passages) { */
+        /* TODO connect secret passages */
+    /* } */
+
+    return RL_OK;
+}
+
+/* find the room tile within BSP */
+void rl_bsp_find_room(RL_Map *map, RL_BSP *leaf, unsigned int *dx, unsigned int *dy)
+{
+    unsigned int x, y;
+    unsigned int start_x, start_y, end_x, end_y;
+    bool found_start = false;
+    RL_ASSERT(dx && dy);
+    RL_ASSERT(map);
+    RL_ASSERT(leaf);
+    for (x = leaf->x; x < leaf->width + leaf->x; ++x) {
+        for (y = leaf->y; y < leaf->height + leaf->y; ++y) {
+            if (!found_start) {
+                if (rl_map_tile_is(map, x, y, RL_TileRoom)) {
+                    start_x = x;
+                    start_y = y;
+                    end_x = x;
+                    end_y = y;
+                    found_start = true;
+                }
+            } else {
+                if (rl_map_tile_is(map, x, y, RL_TileRoom)) {
+                    end_x = x;
+                    end_y = y;
+                } else {
+                    /* found end - return middle of room */
+                    int diff_x = end_x - start_x;
+                    int diff_y = end_y - start_y;
+                    RL_ASSERT(diff_x >= 0 && diff_y >= 0);
+                    *dx = start_x + diff_x/2;
+                    *dy = start_y + diff_y/2;
+                }
+            }
+        }
+    }
+}
+
+bool rl_mapgen_automata_is_alive(const RL_Map *map, int x, int y)
+{
+    if (!rl_map_in_bounds(map, x, y)) return true;
+    return rl_map_tile_is(map, x, y, RL_TileRock);
+}
+unsigned int rl_mapgen_automata_alive_neighbors(const RL_Map *map, int x, int y)
+{
+    return rl_mapgen_automata_is_alive(map, x + 1, y) +
+           rl_mapgen_automata_is_alive(map, x - 1, y) +
+           rl_mapgen_automata_is_alive(map, x,     y + 1) +
+           rl_mapgen_automata_is_alive(map, x,     y - 1) +
+           rl_mapgen_automata_is_alive(map, x + 1, y + 1) +
+           rl_mapgen_automata_is_alive(map, x - 1, y + 1) +
+           rl_mapgen_automata_is_alive(map, x + 1, y - 1) +
+           rl_mapgen_automata_is_alive(map, x - 1, y - 1);
+}
+
+RL_Status rl_mapgen_automata(RL_Map *map, RL_MapgenConfigAutomata config)
+{
+    return rl_mapgen_automata_ex(map, 0, 0, map->width, map->height, &config);
+}
+
+#if RL_ENABLE_PATHFINDING
+static inline float rl_mapgen_corridor_scorer(const RL_GraphNode *current, const RL_GraphNode *neighbor, void *context);
+#endif
+RL_Status rl_mapgen_automata_ex(RL_Map *map, unsigned int offset_x, unsigned int offset_y, unsigned int width, unsigned int height,  const RL_MapgenConfigAutomata *config)
+{
+    unsigned int i, x, y;
+
+    RL_ASSERT(map && config);
+    RL_ASSERT(width > 0 && height > 0);
+    RL_ASSERT(offset_x < width && offset_y < height);
+    RL_ASSERT(offset_x < map->width && offset_y < map->height);
+    RL_ASSERT(offset_x + width <= map->width && offset_y + height <= map->height);
+    RL_ASSERT(config->chance_cell_initialized > 0 && config->chance_cell_initialized <= 100);
+
+    if (map == NULL || config == NULL) {
+        return RL_ErrorNullParameter;
+    }
+
+    /* initialize map */
+    for (x=offset_x; x<offset_x + width; ++x) {
+        for (y=offset_y; y<offset_y + height; ++y) {
+            unsigned int r = RL_RNG_F(1, 100);
+            if (r <= config->chance_cell_initialized) {
+                map->tiles[x + y*map->width] = RL_TileRock;
+            } else {
+                map->tiles[x + y*map->width] = RL_TileRoom;
+            }
+        }
+    }
+
+    /* cellular automata algorithm */
+    for (i=config->max_iterations; i>0; i--) {
+        for (x=offset_x; x<offset_x + width; ++x) {
+            for (y=offset_y; y<offset_y + height; ++y) {
+                unsigned int alive_neighbors = rl_mapgen_automata_alive_neighbors(map, x, y);
+                if (!rl_mapgen_automata_is_alive(map, x, y) && alive_neighbors >= config->birth_threshold) {
+                    /* cell isn't alive but has enough alive neighbors to be born */
+                    map->tiles[x + y*map->width] = RL_TileRock;
+                } else if (rl_mapgen_automata_is_alive(map, x, y) && alive_neighbors >= config->survival_threshold) {
+                    /* cell is alive and has enough alive neighbors to survive */
+                } else {
+                    /* cell dies */
+                    map->tiles[x + y*map->width] = RL_TileRoom;
+                }
+            }
+        }
+    }
+
+    if (config->draw_corridors) {
+#if RL_ENABLE_PATHFINDING
+        /* A very crude algorithm for connecting corridors within the cellular automata. This creates a heap of Dijkstra
+         * graphs, containing each floodfilled region of the map. Then, it goes through each of these regions and
+         * connects it to another random region. This is pretty slow and can be optimized in the future, but works for
+         * now. */
+
+        RL_Heap *heap = rl_heap_create(1, NULL);
+        /* fill floodfills array with a floodfill of each connected space */
+        for (x=offset_x; x<offset_x + width; ++x) {
+            for (y=offset_y; y<offset_y + height; ++y) {
+                int i;
+                bool is_scored = false;
+                if (RL_PASSABLE_F(map, x, y)) {
+                    for (i=0; i<heap->len; ++i) {
+                        RL_Graph *floodfill = (RL_Graph*) heap->heap[i];
+                        if (rl_graph_is_scored(floodfill, RL_XY(x, y))) {
+                            is_scored = true;
+                            break;
+                        }
+                    }
+                    if (!is_scored) {
+                        RL_Graph *floodfill = rl_dijkstra_create(map, RL_XY(x, y), NULL);
+                        rl_heap_insert(heap, floodfill);
+                    }
+                }
+            }
+        }
+        /* connect each floodfill with another random one */
+        if (heap->len > 1) {
+            int i;
+            RL_Graph *graph = rl_graph_create_ex(map, map, NULL, 0);
+            RL_ASSERT(graph);
+            for (i=0; i<heap->len; ++i) {
+                RL_Graph *floodfill_target;
+                RL_Graph *floodfill;
+                RL_Point dig_start, dig_end;
+                size_t node_idx;
+                int j = i;
+                floodfill = (RL_Graph*) heap->heap[i];
+                /* find a random target node to connect to */
+                while (j == i) {
+                    j = RL_RNG_F(0, heap->len - 1);
+                }
+                floodfill_target = (RL_Graph*) heap->heap[j];
+                RL_ASSERT(floodfill && floodfill_target);
+                /* find start & end point for corridor pathfinding */
+                for (node_idx=0; node_idx<floodfill->length; ++node_idx) {
+                    RL_GraphNode *n = &floodfill->nodes[node_idx];
+                    RL_ASSERT(n);
+                    if (n->score < FLT_MAX && RL_PASSABLE_F(map, n->point.x, n->point.y)) {
+                        dig_start = n->point;
+                        break;
+                    }
+                }
+                RL_ASSERT(RL_PASSABLE_F(map, dig_start.x, dig_start.y));
+                for (node_idx=0; node_idx<floodfill_target->length; ++node_idx) {
+                    RL_GraphNode *n = &floodfill_target->nodes[node_idx];
+                    RL_ASSERT(n);
+                    if (n->score < FLT_MAX && RL_PASSABLE_F(map, n->point.x, n->point.y)) {
+                        dig_end = n->point;
+                        break;
+                    }
+                }
+                RL_ASSERT(RL_PASSABLE_F(map, dig_end.x, dig_end.y));
+                RL_ASSERT(!(dig_start.x == dig_end.x && dig_start.y == dig_end.y));
+                /* carve out corridors */
+                rl_dijkstra_score_ex(graph, dig_end, rl_mapgen_corridor_scorer, map);
+                RL_Path *path = rl_path_create_from_graph(graph, dig_start);
+                RL_ASSERT(path);
+                while ((path = rl_path_walk(path))) {
+                    if (rl_map_tile_is(map, path->point.x, path->point.y, RL_TileRock)) {
+                        map->tiles[(size_t)floor(path->point.x) + (size_t)floor(path->point.y) * map->width] = RL_TileCorridor;
+                    }
+                }
+            }
+            rl_graph_destroy(graph);
+        }
+        /* cleanup */
+        RL_Graph *floodfill = NULL;
+        while ((floodfill = (RL_Graph*) rl_heap_pop(heap))) {
+            rl_graph_destroy(floodfill);
+        }
+        rl_heap_destroy(heap);
+#else
+        return RL_ErrorMapgenInvalidConfig;
+#endif
+    }
+    if (config->fill_border) {
+        x = 0;
+        for (y=offset_y; y<height; ++y) map->tiles[x + y*map->width] = RL_TileRock;
+        x = width - 1;
+        for (y=offset_y; y<height; ++y) map->tiles[x + y*map->width] = RL_TileRock;
+        y = 0;
+        for (x=offset_x; x<width; ++x) map->tiles[x + y*map->width] = RL_TileRock;
+        y = height - 1;
+        for (x=offset_x; x<width; ++x) map->tiles[x + y*map->width] = RL_TileRock;
+    }
+    if (config->cull_unconnected) {
+#if RL_ENABLE_PATHFINDING
+        RL_Graph *floodfill = rl_graph_floodfill_largest_area(map);
+        if (floodfill) {
+            for (x=offset_x; x<offset_x + width; ++x) {
+                for (y=offset_y; y<offset_y + height; ++y) {
+                    if (!rl_graph_is_scored(floodfill, RL_XY(x, y))) {
+                        map->tiles[x + y*map->width] = RL_TileRock;
+                    }
+                }
+            }
+            rl_graph_destroy(floodfill);
+        }
+#else
+        return RL_ErrorMapgenInvalidConfig;
+#endif
+    }
+
+    return RL_OK;
+}
+
+typedef struct {
+    int x, y;
+} RL_MapPoint;
+
+int rl_mapgen_maze_unvisited_neighbors(RL_MapPoint ps[4], const RL_Map *map, int x, int y, int sx, int mx, int sy, int my)
+{
+    RL_MapPoint neighbors[4];
+    int i, count = 0;
+    neighbors[0].x = x - 2;
+    neighbors[0].y = y;
+    neighbors[1].x = x + 2;
+    neighbors[1].y = y;
+    neighbors[2].x = x;
+    neighbors[2].y = y - 2;
+    neighbors[3].x = x;
+    neighbors[3].y = y + 2;
+    for (i = 0; i<4; ++i) {
+        int x = neighbors[i].x;
+        int y = neighbors[i].y;
+        if (x < sx || x >= mx || y < sy || y >= my) continue;
+        if (map->tiles[x + y*map->width] == RL_TileRock) {
+            /* matching neighbor */
+            ps[count].x = x;
+            ps[count].y = y;
+            count ++;
+        }
+    }
+
+    return count;
+}
+
+RL_Status rl_mapgen_maze(RL_Map *map)
+{
+    RL_ASSERT(map);
+    if (map == NULL) return RL_ErrorNullParameter;
+    RL_ASSERT(map->width > 2 && map->height > 2);
+    memset(map->tiles, RL_TileRock, sizeof(*map->tiles) * map->width * map->height);
+    return rl_mapgen_maze_ex(map, 1, 1, map->width - 2, map->height - 2);
+}
+
+RL_Status rl_mapgen_maze_ex(RL_Map *map, unsigned int offset_x, unsigned int offset_y, unsigned int width, unsigned int height)
+{
+    int x, y;
+    RL_MapPoint *ps;
+    RL_MapPoint *p;
+    RL_Heap *heap;
+
+    RL_ASSERT(map);
+    RL_ASSERT(width > 0 && height > 0);
+    RL_ASSERT(offset_x < width && offset_y < height);
+    RL_ASSERT(offset_x < map->width && offset_y < map->height);
+    RL_ASSERT(offset_x + width <= map->width && offset_y + height <= map->height);
+    RL_ASSERT(offset_x + width < INT_MAX);
+    RL_ASSERT(offset_y + height < INT_MAX);
+
+    if (map == NULL) {
+        return RL_ErrorNullParameter;
+    }
+
+    /* reset all tiles within range to rock */
+    for (x = (int)offset_x; x < (int)offset_x + (int)width; ++x) {
+        for (y = (int)offset_y; y < (int)offset_y + (int)height; ++y) {
+            map->tiles[x + y*map->width] = RL_TileRock;
+        }
+    }
+
+    /* allocate memory for BFS */
+    heap = rl_heap_create(width * height, NULL);
+    ps = (RL_MapPoint*) RL_MALLOC(sizeof(*ps) * map->width * map->height);
+
+    RL_ASSERT(ps && heap);
+    if (ps == NULL || heap == NULL) {
+        return RL_ErrorMemory;
+    }
+
+    /* choose random starting tile */
+    x = RL_RNG_F(offset_x, offset_x + width - 1);
+    y = RL_RNG_F(offset_y, offset_y + height - 1);
+    map->tiles[x + y*map->width] = RL_TileCorridor;
+    p = &ps[x + y*map->width];
+    p->x = x;
+    p->y = y;
+    rl_heap_insert(heap, p);
+    while ((p = (RL_MapPoint*) rl_heap_pop(heap)) != NULL) {
+        /* check unvisited neighbors (+2 so we have enough space for walls) */
+        RL_MapPoint neighbors[4];
+        RL_MapPoint *p2;
+        int wall_x, wall_y, i;
+        int neighbors_count = rl_mapgen_maze_unvisited_neighbors(neighbors, map, p->x, p->y, offset_x, offset_x + width, offset_y, offset_y + height);
+        if (neighbors_count == 0) continue;
+        /* choose one unvisitied neighbor */
+        i = RL_RNG_F(0, neighbors_count - 1);
+        x = neighbors[i].x;
+        y = neighbors[i].y;
+        RL_ASSERT(rl_map_in_bounds(map, x, y));
+        RL_ASSERT(map->tiles[x + y*map->width] == RL_TileRock);
+        /* unvisited neighbor - remove wall and push to heap */
+        wall_x = x;
+        wall_y = y;
+        if (x < p->x) wall_x = x + 1;
+        if (x > p->x) wall_x = x - 1;
+        if (y < p->y) wall_y = y + 1;
+        if (y > p->y) wall_y = y - 1;
+        map->tiles[wall_x + wall_y*map->width] = RL_TileCorridor;
+        map->tiles[x + y*map->width] = RL_TileCorridor;
+        p2 = &ps[x + y*map->width];
+        p2->x = x;
+        p2->y = y;
+        rl_heap_insert(heap, p);
+        rl_heap_insert(heap, p2);
+    }
+
+    /* free memory for BFS */
+    rl_heap_destroy(heap);
+    RL_FREE(ps);
+
+    return RL_OK;
+}
+
+/* custom corridor connection to most efficiently connect leaves of the BSP tree */
+void rl_mapgen_connect_corridors_simple(RL_Map *map, RL_BSP *root, bool draw_doors)
+{
+    /* unsigned int dig_start_x, dig_start_y, dig_end_x, dig_end_y, cur_x, cur_y; */
+    unsigned int dig_start_x, dig_start_y, dig_end_x, dig_end_y, cur_x, cur_y;
+    int direction, diff_y, diff_x;
+    RL_BSP *node, *sibling, *left, *right;
+
+    RL_ASSERT(map && root);
+    if (!map || !root) return;
+
+    /* connect siblings */
+    node = root->left;
+    sibling = root->right;
+    if (node == NULL || sibling == NULL) return;
+
+    /* find rooms in BSP */
+    left = rl_bsp_random_leaf(node);
+    right = rl_bsp_random_leaf(sibling);
+#if RL_MAPGEN_BSP_RANDOMISE_ROOM_LOC
+    rl_bsp_find_room(map, left, &dig_start_x, &dig_start_y);
+#else
+    dig_start_x = left->x + left->width / 2;
+    dig_start_y = left->y + left->height / 2;
+#endif
+#if RL_MAPGEN_BSP_RANDOMISE_ROOM_LOC
+    rl_bsp_find_room(map, right, &dig_end_x, &dig_end_y);
+#else
+    dig_end_x = right->x + right->width / 2;
+    dig_end_y = right->y + right->height / 2;
+#endif
+    RL_ASSERT(RL_PASSABLE_F(map, dig_start_x, dig_start_y));
+    RL_ASSERT(RL_PASSABLE_F(map, dig_end_x, dig_end_y));
+    RL_ASSERT(!(dig_start_x == dig_end_x && dig_start_y == dig_end_y));
+
+    /* carve out corridors */
+    cur_x = dig_start_x;
+    cur_y = dig_start_y;
+    direction = 0;
+    diff_y = cur_y - dig_end_y;
+    if (diff_y < 0) diff_y *= -1;
+    diff_x = cur_x - dig_end_x;
+    if (diff_x < 0) diff_x *= -1;
+    if (diff_y > diff_x) {
+        direction = 1;
+    }
+    while (cur_x != dig_end_x || cur_y != dig_end_y) {
+        /* prevent digging float wide corridors */
+        unsigned int next_x, next_y; next_x = cur_x;
+        next_y = cur_y;
+        if (direction == 0) { /* digging left<->right */
+            if (cur_x == dig_end_x) {
+                direction = !direction;
+            } else {
+                next_x += dig_end_x < cur_x ? -1 : 1;
+            }
+        }
+        if (direction == 1) { /* digging up<->down */
+            if (cur_y == dig_end_y) {
+                direction = !direction;
+            } else {
+                next_y += dig_end_y < cur_y ? -1 : 1;
+            }
+        }
+        /* dig */
+        if (map->tiles[cur_x + cur_y*map->width] == RL_TileRock) {
+            if (draw_doors && rl_map_is_room_wall(map, cur_x, cur_y))
+                map->tiles[cur_x + cur_y*map->width] = RL_TileDoor;
+            else
+                map->tiles[cur_x + cur_y*map->width] = RL_TileCorridor;
+        }
+        cur_x = next_x;
+        cur_y = next_y;
+    }
+
+    /* connect siblings' children */
+    rl_mapgen_connect_corridors_simple(map, node, draw_doors);
+    rl_mapgen_connect_corridors_simple(map, sibling, draw_doors);
+}
+
+
+void rl_mapgen_connect_corridors_bsp(RL_Map *map, RL_BSP *root, bool draw_doors);
+void rl_mapgen_connect_corridors_randomly(RL_Map *map, RL_BSP *root, bool draw_doors);
+RL_Status rl_mapgen_connect_corridors(RL_Map *map, RL_BSP *root, bool draw_doors, RL_MapgenCorridorConnection connection_algorithm)
+{
+    switch (connection_algorithm) {
+        case RL_ConnectNone:
+            RL_UNUSED(map);
+            RL_UNUSED(root);
+            RL_UNUSED(draw_doors);
+            break;
+        case RL_ConnectSimple:
+            rl_mapgen_connect_corridors_simple(map, root, draw_doors);
+            break;
+        case RL_ConnectRandomly:
+#if RL_ENABLE_PATHFINDING
+            rl_mapgen_connect_corridors_randomly(map, root, draw_doors);
+            {
+                /* cull non-connected tiles */
+                RL_Graph *floodfill = rl_graph_floodfill_largest_area(map);
+                RL_ASSERT(floodfill);
+                if (floodfill) {
+                    for (size_t x=0; x < map->width; ++x) {
+                        for (size_t y=0; y < map->height; ++y) {
+                            if (floodfill->nodes[x + y*map->width].score == FLT_MAX) {
+                                /* set unreachable tiles to rock */
+                                map->tiles[x + y*map->width] = RL_TileRock;
+                            }
+                        }
+                    }
+                    rl_graph_destroy(floodfill);
+                }
+            }
+            break;
+#endif
+        case RL_ConnectBSP:
+#if RL_ENABLE_PATHFINDING
+            rl_mapgen_connect_corridors_bsp(map, root, draw_doors);
+            break;
+#endif
+        default:
+            return RL_ErrorMapgenInvalidConfig;
+    }
+
+    return RL_OK;
+}
+
+/**
+ * Heap functions for pathfinding
+ *
+ * Ref: https://gist.github.com/skeeto/f012a207aff1753662b679917f706de6
+ */
+
+static int rl_heap_noop_comparison_f(const void *_a, const void *_b)
+{
+    RL_UNUSED(_a);
+    RL_UNUSED(_b);
+    return 1;
+}
+
+RL_Heap *rl_heap_create(int capacity, int (*comparison_f)(const void *heap_item_a, const void *heap_item_b))
+{
+    RL_Heap *heap;
+    heap = (RL_Heap*) RL_MALLOC(sizeof(*heap));
+    RL_ASSERT(heap);
+    RL_ASSERT(capacity > 0);
+    if (heap == NULL) {
+        return NULL;
+    }
+    heap->heap = (void**) RL_MALLOC(sizeof(*heap->heap) * capacity);
+    RL_ASSERT(heap->heap);
+    if (heap->heap == NULL) {
+        RL_FREE(heap);
+        return NULL;
+    }
+
+    if (comparison_f == NULL) {
+        comparison_f = rl_heap_noop_comparison_f;
+    }
+
+    heap->cap = capacity;
+    heap->comparison_f = comparison_f;
+    heap->len = 0;
+
+    return heap;
+}
+
+void rl_heap_destroy(RL_Heap *h)
+{
+    if (h) {
+        if (h->heap) {
+            RL_FREE(h->heap);
+        }
+        RL_FREE(h);
+    }
+}
+
+int rl_heap_length(const RL_Heap *h)
+{
+    if (h == NULL) return 0;
+    return h->len;
+}
+
+bool rl_heap_insert(RL_Heap *h, void *item)
+{
+    int i;
+    RL_ASSERT(h != NULL);
+    if (h == NULL) return false;
+
+    if (h->len == h->cap) {
+        /* resize the heap */
+        void **heap_items = (void**) RL_REALLOC(h->heap, sizeof(void*) * h->cap * 2);
+        RL_ASSERT(heap_items);
+        if (heap_items == NULL) {
+            rl_heap_destroy(h);
+            return false;
+        }
+        h->heap = heap_items;
+        h->cap *= 2;
+    }
+
+    h->heap[h->len] = item;
+    for (i = h->len++; i;) {
+        void *tmp;
+        int p = (i - 1) / 2;
+        if (h->comparison_f(h->heap[p], h->heap[i])) {
+            break;
+        }
+        tmp = h->heap[p];
+        h->heap[p] = h->heap[i];
+        h->heap[i] = tmp;
+        i = p;
+    }
+    return true;
+}
+
+static void rl_heap_remove(RL_Heap *h, int index)
+{
+    int i;
+    RL_ASSERT(h);
+    if (h == NULL) {
+        return;
+    }
+
+    h->heap[index] = h->heap[--h->len];
+    for (i = index;;) {
+        int a = 2*i + 1;
+        int b = 2*i + 2;
+        int j = i;
+        void *tmp;
+        if (a < h->len && h->comparison_f(h->heap[a], h->heap[j])) j = a;
+        if (b < h->len && h->comparison_f(h->heap[b], h->heap[j])) j = b;
+        if (i == j) break;
+        tmp = h->heap[j];
+        h->heap[j] = h->heap[i];
+        h->heap[i] = tmp;
+        i = j;
+    }
+}
+
+void *rl_heap_pop(RL_Heap *h)
+{
+    void *r;
+    if (h == NULL) {
+        return NULL;
+    }
+
+    r = NULL;
+    if (h->len) {
+        RL_ASSERT(h->heap);
+        r = h->heap[0];
+        rl_heap_remove(h, 0);
+    }
+    return r;
+}
+
+void *rl_heap_peek(RL_Heap *h)
+{
+    if (h == NULL) {
+        return NULL;
+    }
+
+    RL_ASSERT(h->heap);
+    if (h->len) {
+        return h->heap[0];
+    } else {
+        return NULL;
+    }
+}
+
+#if RL_ENABLE_PATHFINDING
+/* simplified distance for side by side nodes */
+static float rl_distance_simple(RL_Point node, RL_Point end)
+{
+    if (node.x == end.x && node.y == end.y) return 0;
+    if (node.x == end.x || node.y == end.y) return 1;
+    return 1.4;
+}
+
+static int rl_scored_graph_heap_comparison(const void *heap_item_a, const void *heap_item_b)
+{
+    RL_GraphNode *node_a = (RL_GraphNode*) heap_item_a;
+    RL_GraphNode *node_b = (RL_GraphNode*) heap_item_b;
+
+    return node_a->score < node_b->score;
+}
+
+RL_Path *rl_path(RL_Point p)
+{
+    RL_Path *path = (RL_Path*) RL_MALLOC(sizeof(*path));
+    RL_ASSERT(path);
+    if (path == NULL) return NULL;
+    path->next = NULL;
+    path->point = p;
+
+    return path;
+}
+
+float rl_distance_manhattan(RL_Point node, RL_Point end)
+{
+    return fabs(node.x - end.x) + fabs(node.y - end.y);
+}
+
+float rl_distance_euclidian(RL_Point node, RL_Point end)
+{
+    float distance_x = node.x - end.x;
+    float distance_y = node.y - end.y;
+
+    return sqrt(distance_x * distance_x + distance_y * distance_y);
+}
+
+float rl_distance_chebyshev(RL_Point node, RL_Point end)
+{
+    float distance_x = fabs(node.x - end.x);
+    float distance_y = fabs(node.y - end.y);
+
+    return distance_x > distance_y ? distance_x : distance_y;
+}
+
+/* custom Dijkstra scorer function to prevent carving double wide doors when carving corridors */
+static inline float rl_mapgen_corridor_scorer(const RL_GraphNode *current, const RL_GraphNode *neighbor, void *context)
+{
+    RL_Map *map = (RL_Map*) context;
+    RL_Point start = current->point;
+    RL_Point end = neighbor->point;
+    float r = current->score + rl_distance_manhattan(start, end);
+
+    if (rl_map_tile_is(map, end.x, end.y, RL_TileDoor)) {
+        return r; /* doors are passable but count as "walls" - encourage passing through them */
+    }
+    if (rl_map_is_corner_wall(map, end.x, end.y)) {
+        return r + 99; /* discourage double wide corridors & double carving into walls */
+    }
+    if (RL_WALL_F(map, end.x, end.y)) {
+        return r + 9; /* discourage double wide corridors & double carving into walls */
+    }
+
+    return r;
+}
+
+void rl_mapgen_connect_corridors_bsp_recursive(RL_Map *map, RL_BSP *root, bool draw_doors, RL_Graph *graph)
+{
+    RL_ASSERT(map && root && graph);
+    if (map == NULL || root == NULL || graph == NULL) return;
+
+    /* connect siblings */
+    RL_BSP *node = root->left;
+    RL_BSP *sibling = root->right;
+    if (node == NULL || sibling == NULL) return;
+
+    /* find rooms in BSP */
+    unsigned int x, y;
+    RL_BSP *leaf = rl_bsp_random_leaf(node);
+    rl_bsp_find_room(map, leaf, &x, &y);
+    RL_Point dig_start = {x, y};
+    RL_ASSERT(RL_PASSABLE_F(map, dig_start.x, dig_start.y));
+    leaf = rl_bsp_random_leaf(sibling);
+    rl_bsp_find_room(map, leaf, &x, &y);
+    RL_Point dig_end = {x, y};
+    RL_ASSERT(RL_PASSABLE_F(map, dig_end.x, dig_end.y));
+    RL_ASSERT(!(dig_start.x == dig_end.x && dig_start.y == dig_end.y));
+
+    /* carve out corridors */
+    rl_dijkstra_score_ex(graph, dig_end, rl_mapgen_corridor_scorer, map);
+    RL_Path *path = rl_path_create_from_graph(graph, dig_start);
+    RL_ASSERT(path);
+    while ((path = rl_path_walk(path))) {
+        if (rl_map_tile_is(map, path->point.x, path->point.y, RL_TileRock)) {
+            if (rl_map_is_room_wall(map, path->point.x, path->point.y) && draw_doors) {
+                map->tiles[(size_t)floor(path->point.x) + (size_t)floor(path->point.y) * map->width] = RL_TileDoor;
+            } else {
+                map->tiles[(size_t)floor(path->point.x) + (size_t)floor(path->point.y) * map->width] = RL_TileCorridor;
+            }
+        }
+    }
+
+    /* connect siblings' children */
+    rl_mapgen_connect_corridors_bsp_recursive(map, node, draw_doors, graph);
+    rl_mapgen_connect_corridors_bsp_recursive(map, sibling, draw_doors, graph);
+}
+void rl_mapgen_connect_corridors_bsp(RL_Map *map, RL_BSP *root, bool draw_doors)
+{
+    RL_Graph *graph = rl_graph_create_ex(map, map, NULL, 0);
+    RL_ASSERT(graph);
+    if (graph) {
+        rl_mapgen_connect_corridors_bsp_recursive(map, root, draw_doors, graph);
+        rl_graph_destroy(graph);
+    }
+}
+
+void rl_mapgen_connect_corridors_randomly(RL_Map *map, RL_BSP *root, bool draw_doors)
+{
+    RL_ASSERT(map && root);
+    if (!map || !root) return;
+
+    /* find deepest left-most node */
+    RL_BSP *leftmost_node = root;
+    while (leftmost_node->left != NULL) {
+        leftmost_node = leftmost_node->left;
+    }
+    RL_ASSERT(leftmost_node && rl_bsp_is_leaf(leftmost_node));
+    RL_BSP *node = leftmost_node;
+    RL_Graph *graph = rl_graph_create_ex(map, map, NULL, 0);
+    RL_ASSERT(graph);
+    if (graph == NULL) return;
+    while (node) {
+        RL_BSP *sibling;
+
+        /* find random sibling */
+        while ((sibling = rl_bsp_random_leaf(root)) == node) {}
+        RL_ASSERT(sibling);
+
+        /* TODO need to change this to find the *actual* room (e.g. what if the user provides a map filled with "."?) */
+        unsigned int x, y;
+        rl_bsp_find_room(map, node, &x, &y);
+        RL_ASSERT(RL_PASSABLE_F(map, x, y));
+        RL_Point dig_start = {x, y};
+        RL_ASSERT(RL_PASSABLE_F(map, dig_start.x, dig_start.y));
+        rl_bsp_find_room(map, sibling, &x, &y);
+        RL_ASSERT(RL_PASSABLE_F(map, x, y));
+        RL_Point dig_end = {x, y};
+        RL_ASSERT(RL_PASSABLE_F(map, dig_end.x, dig_end.y));
+        RL_ASSERT(!(dig_start.x == dig_end.x && dig_start.y == dig_end.y));
+
+        /* carve out corridors */
+        rl_dijkstra_score_ex(graph, dig_end, rl_mapgen_corridor_scorer, map);
+        RL_Path *path = rl_path_create_from_graph(graph, dig_start);
+        RL_ASSERT(path);
+        while ((path = rl_path_walk(path))) {
+            if (rl_map_tile_is(map, path->point.x, path->point.y, RL_TileRock)) {
+                if (rl_map_is_room_wall(map, path->point.x, path->point.y) && draw_doors) {
+                    map->tiles[(size_t)floor(path->point.x) + (size_t)floor(path->point.y) * map->width] = RL_TileDoor;
+                } else {
+                    map->tiles[(size_t)floor(path->point.x) + (size_t)floor(path->point.y) * map->width] = RL_TileCorridor;
+                }
+            }
+        }
+
+        /* find start node for next loop iteration */
+        node = rl_bsp_next_leaf(node);
+    }
+
+    rl_graph_destroy(graph);
+}
+
+
+RL_Graph *rl_graph_floodfill_largest_area(const RL_Map *map)
+{
+    RL_ASSERT(map);
+    if (map == NULL) return NULL;
+    int *visited = (int*) RL_CALLOC(map->width * map->height, sizeof(*visited));
+    RL_ASSERT(visited);
+    if (visited == NULL) return NULL;
+    RL_Graph *floodfill = NULL; /* largest floodfill */
+    int floodfill_scored = 0;
+    for (unsigned int x = 0; x < map->width; ++x) {
+        for (unsigned int y = 0; y < map->height; ++y) {
+            if (RL_PASSABLE_F(map, x, y) && !visited[x + y*map->width]) {
+                RL_Graph *test = rl_dijkstra_create(map, RL_XY(x, y), NULL);
+                RL_ASSERT(test);
+                if (test == NULL) {
+                    RL_FREE(visited);
+                    if (floodfill) {
+                        rl_graph_destroy(floodfill);
+                    }
+                    return NULL;
+                }
+                int test_scored = 0;
+                for (size_t i = 0; i < test->length; i++) {
+                    if (test->nodes[i].score != FLT_MAX) {
+                        visited[i] = 1;
+                        test_scored ++;
+                    }
+                }
+                if (test_scored > floodfill_scored) {
+                    floodfill_scored = test_scored;
+                    if (floodfill) {
+                        rl_graph_destroy(floodfill);
+                    }
+                    floodfill = test;
+                } else {
+                    rl_graph_destroy(test);
+                }
+            }
+        }
+    }
+
+    RL_FREE(visited);
+
+    return floodfill;
+}
+
+
+RL_Path *rl_line_create(RL_Point a, RL_Point b, float step)
+{
+    float delta_x = fabs(a.x - b.x);
+    float x_increment = b.x > a.x ? step : -step;
+    float delta_y = fabs(a.y - b.y);
+    float y_increment = b.y > a.y ? step : -step;
+    float error = 0.0;
+    float slope = delta_x ? delta_y / delta_x : 0.0;
+
+    RL_Path *head = rl_path(a);
+    if (head == NULL) return NULL;
+    RL_Path *path = head;
+    while (path->point.x != b.x || path->point.y != b.y) {
+        RL_Point point = path->point;
+
+        if (delta_x > delta_y) {
+            error += slope;
+            if (error > 0.5 && point.y != b.y) {
+                error -= 1.0;
+                point.y += y_increment;
+            }
+
+            point.x += x_increment;
+        } else {
+            error += 1/slope;
+            if (error > 0.5 && point.x != b.x) {
+                error -= 1.0;
+                point.x += x_increment;
+            }
+
+            point.y += y_increment;
+        }
+
+        /* add new member to linked list & advance */
+        path->next = rl_path(point);
+        path = path->next;
+    }
+
+    return head;
+}
+
+RL_Path *rl_path_create(const RL_Map *map, RL_Point start, RL_Point end, RL_DistanceFun distance_f)
+{
+    RL_Graph *graph = rl_dijkstra_create(map, end, distance_f);
+    RL_ASSERT(graph);
+    if (graph == NULL) return NULL;
+    RL_Path *path = rl_path_create_from_graph(graph, start);
+    RL_ASSERT(path);
+    rl_graph_destroy(graph);
+
+    return path;
+}
+
+RL_Path *rl_path_create_from_graph(const RL_Graph *graph, RL_Point start)
+{
+    RL_Path *path = rl_path(start);
+    RL_Path *path_start = path;
+    RL_GraphNode *node = NULL;
+    RL_ASSERT(path);
+    RL_ASSERT(graph && graph->nodes);
+    if (path == NULL || graph == NULL || graph->nodes == NULL) return NULL;
+    for (size_t i=0; i<graph->length; i++) {
+        if (graph->nodes[i].point.x == start.x && graph->nodes[i].point.y == start.y) {
+            node = &graph->nodes[i];
+        }
+    }
+    if (node == NULL) {
+        return path;
+    }
+    while (node != NULL && node->score > 0) {
+        node = rl_graph_node_lowest_neighbor(node);
+        if (node == NULL) break;
+        path->next = rl_path(node->point);
+        RL_ASSERT(path->next);
+        if (path->next == NULL) {
+            rl_path_destroy(path);
+            return NULL;
+        }
+        path = path->next;
+    }
+
+    return path_start;
+}
+
+RL_Path *rl_path_walk(RL_Path *path)
+{
+    if (!path) return NULL;
+    RL_Path *next = path->next;
+    path->next = NULL;
+    RL_FREE(path);
+
+    return next;
+}
+
+void rl_path_destroy(RL_Path *path)
+{
+    if (path) {
+        while ((path = rl_path_walk(path))) {}
+    }
+}
+
+bool rl_graph_default_passable_fun(void *context, unsigned int x, unsigned int y)
+{
+    RL_Map *map = (RL_Map*) context;
+    return RL_PASSABLE_F(map, x, y);
+}
+
+RL_Graph *rl_graph_create(const RL_Map *map)
+{
+    return rl_graph_create_ex(map, (void*) map, rl_graph_default_passable_fun, true);
+}
+
+RL_Graph *rl_graph_create_ex(const RL_Map *map, void *context, RL_PassableFun passable_f, bool allow_diagonal_neighbors)
+{
+    RL_Graph *graph = (RL_Graph*) RL_MALLOC(sizeof(*graph));
+    RL_ASSERT(graph);
+    if (graph == NULL) return NULL;
+    size_t length = map->width * map->height;
+    RL_GraphNode *nodes = (RL_GraphNode*) RL_CALLOC(length, sizeof(*nodes));
+    RL_ASSERT(nodes != NULL);
+    if (nodes == NULL) {
+        RL_FREE(graph);
+        return NULL;
+    }
+    for (unsigned int x=0; x<map->width; x++) {
+        for (unsigned int y=0; y<map->height; y++) {
+            size_t idx = x + y*map->width;
+            RL_GraphNode *node = &nodes[idx];
+            node->point.x = (float) x;
+            node->point.y = (float) y;
+            node->neighbors_length = 0;
+            node->score = FLT_MAX;
+            /* calculate neighbors */
+            RL_Point neighbor_coords[8];
+            neighbor_coords[0].x = (int)x + 1;
+            neighbor_coords[0].y = (int)y;
+            neighbor_coords[1].x = (int)x - 1;
+            neighbor_coords[1].y = (int)y;
+            neighbor_coords[2].x = (int)x;
+            neighbor_coords[2].y = (int)y + 1;
+            neighbor_coords[3].x = (int)x;
+            neighbor_coords[3].y = (int)y - 1;
+            neighbor_coords[4].x = (int)x + 1;
+            neighbor_coords[4].y = (int)y + 1;
+            neighbor_coords[5].x = (int)x + 1;
+            neighbor_coords[5].y = (int)y - 1;
+            neighbor_coords[6].x = (int)x - 1;
+            neighbor_coords[6].y = (int)y + 1;
+            neighbor_coords[7].x = (int)x - 1;
+            neighbor_coords[7].y = (int)y - 1;
+            for (int i=0; i<8; i++) {
+                if (passable_f && !passable_f(context, neighbor_coords[i].x, neighbor_coords[i].y))
+                    continue;
+                if (!rl_map_in_bounds(map, neighbor_coords[i].x, neighbor_coords[i].y))
+                    continue;
+                if (!allow_diagonal_neighbors && i >= 4)
+                    continue;
+
+                size_t idx = neighbor_coords[i].x + neighbor_coords[i].y*map->width;
+                node->neighbors[node->neighbors_length] = &nodes[idx];
+                node->neighbors_length++;
+            }
+        }
+    }
+
+    graph->length = length;
+    graph->nodes = nodes;
+
+    return graph;
+}
+
+void rl_graph_add(RL_Graph *graph, const RL_Graph *graph_b)
+{
+    RL_ASSERT(graph != NULL);
+    RL_ASSERT(graph_b != NULL);
+    RL_ASSERT(graph->length == graph_b->length);
+    for (size_t i=0; i < graph->length; i++) {
+        RL_GraphNode *node = &graph->nodes[i];
+        if (node->score <= FLT_MAX - graph_b->nodes[i].score) {
+            node->score += graph_b->nodes[i].score;
+        } else {
+            node->score = FLT_MAX;
+        }
+    }
+}
+
+void rl_graph_weight(RL_Graph *graph, float coefficient)
+{
+    RL_ASSERT(graph != NULL);
+    RL_ASSERT(coefficient <= 1 && coefficient >= 0);
+    for (size_t i=0; i < graph->length; i++) {
+        RL_GraphNode *node = &graph->nodes[i];
+        node->score *= coefficient;
+    }
+}
+
+RL_Graph *rl_dijkstra_create(const RL_Map *map,
+                            RL_Point start,
+                            RL_DistanceFun distance_f)
+{
+    RL_Graph *graph = rl_graph_create(map);
+    rl_dijkstra_score(graph, start, distance_f);
+
+    return graph;
+}
+
+/* default scorer function for Dijkstra - this simply accepts a RL_DistanceFun as context and adds the current nodes */
+/* score to the result of the distance function */
+struct rl_score_context { RL_DistanceFun fun; };
+float rl_dijkstra_default_score_f(const RL_GraphNode *current, const RL_GraphNode *neighbor, void *context)
+{
+    struct rl_score_context *distance_f = (struct rl_score_context*) context;
+
+    return current->score + distance_f->fun(current->point, neighbor->point);
+}
+
+void rl_dijkstra_score(RL_Graph *graph, RL_Point start, RL_DistanceFun distance_f)
+{
+    struct { RL_DistanceFun fun; } scorer_context;
+    scorer_context.fun = distance_f ? distance_f : rl_distance_simple; /* default to rl_distance_simple */
+    rl_dijkstra_score_ex(graph, start, rl_dijkstra_default_score_f, &scorer_context);
+}
+
+void rl_graph_reset(RL_Graph *graph)
+{
+    RL_ASSERT(graph != NULL);
+    if (graph == NULL) return;
+    /* reset scores of dijkstra map */
+    for (size_t i=0; i < graph->length; i++) {
+        RL_GraphNode *node = &graph->nodes[i];
+        node->score = FLT_MAX;
+    }
+}
+
+void rl_dijkstra_score_ex(RL_Graph *graph, RL_Point start, RL_ScoreFun score_f, void *score_context)
+{
+    RL_ASSERT(graph);
+    RL_ASSERT(score_f);
+    if (graph == NULL) return;
+
+    RL_GraphNode *current;
+    RL_Heap *heap = rl_heap_create(graph->length, &rl_scored_graph_heap_comparison);
+
+    /* reset scores of dijkstra map, setting the start point to 0 */
+    for (size_t i=0; i < graph->length; i++) {
+        RL_GraphNode *node = &graph->nodes[i];
+        if (node->point.x == start.x && node->point.y == start.y) {
+            node->score = 0;
+            current = node;
+        } else {
+            node->score = FLT_MAX;
+        }
+    }
+
+    rl_heap_insert(heap, (void*) current);
+    current = (RL_GraphNode*) rl_heap_pop(heap);
+    while (current) {
+        for (size_t i=0; i<current->neighbors_length; i++) {
+            RL_GraphNode *neighbor = current->neighbors[i];
+            float distance = score_f(current, neighbor, score_context);
+            if (distance < neighbor->score) {
+                if (neighbor->score == FLT_MAX) {
+                    rl_heap_insert(heap, neighbor);
+                }
+                neighbor->score = distance;
+            }
+        }
+
+        current = (RL_GraphNode *) rl_heap_pop(heap);
+    }
+
+    rl_heap_destroy(heap);
+}
+
+void rl_graph_destroy(RL_Graph *graph)
+{
+    if (graph) {
+        if (graph->nodes) {
+            RL_FREE(graph->nodes);
+        }
+        RL_FREE(graph);
+    }
+}
+
+bool rl_graph_is_scored(const RL_Graph *graph, RL_Point point)
+{
+    RL_GraphNode *n = rl_graph_node(graph, point);
+    if (n) {
+        return n->score < FLT_MAX;
+    } else {
+        return false;
+    }
+}
+
+RL_GraphNode *rl_graph_node(const RL_Graph *graph, RL_Point point)
+{
+    RL_ASSERT(graph);
+    if (graph == NULL) return NULL;
+    for (unsigned int i=0; i<graph->length; ++i) {
+        RL_GraphNode *n = &graph->nodes[i];
+        RL_ASSERT(n);
+        if (n && n->point.x == point.x && n->point.y == point.y) {
+            return n;
+        }
+    }
+    return NULL;
+}
+
+RL_GraphNode *rl_graph_node_lowest_neighbor(const RL_GraphNode *node)
+{
+    RL_ASSERT(node);
+    if (node == NULL) return NULL;
+    RL_GraphNode *lowest_neighbor = NULL;
+    for (size_t i=0; i<node->neighbors_length; i++) {
+        RL_GraphNode *neighbor = node->neighbors[i];
+        if (!lowest_neighbor || neighbor->score < lowest_neighbor->score) {
+            lowest_neighbor = neighbor;
+        }
+    }
+    if (lowest_neighbor->score == FLT_MAX) return NULL;
+    return lowest_neighbor;
+}
+
+int rl_graph_compare_neighbors(const void *a, const void *b)
+{
+    RL_GraphNode **node_a = (RL_GraphNode**) a;
+    RL_GraphNode **node_b = (RL_GraphNode**) b;
+
+    if ((*node_a)->score < (*node_b)->score) return -1;
+    if ((*node_a)->score > (*node_b)->score) return 1;
+    return 0;
+}
+
+void rl_graph_node_sort_neighbors(RL_GraphNode *node)
+{
+    qsort(node->neighbors, node->neighbors_length, sizeof(*node->neighbors), rl_graph_compare_neighbors);
+}
+#endif /* RL_ENABLE_PATHFINDING */
+
+#if RL_ENABLE_FOV
+RL_FOV *rl_fov_create(unsigned int width, unsigned int height)
+{
+    RL_FOV *fov;
+    unsigned char *memory;
+    RL_ASSERT(width > 0 && height > 0);
+    RL_ASSERT(width != UINT_MAX && !(width > UINT_MAX / height)); /* check for overflow */
+    fov = NULL;
+    /* allocate all the memory we need at once */
+    memory = (unsigned char*) RL_CALLOC(sizeof(*fov) + sizeof(*fov->visibility)*width*height, 1);
+    RL_ASSERT(memory);
+    if (memory == NULL) return NULL;
+    fov = (RL_FOV*) memory;
+    fov->width = width;
+    fov->height = height;
+    fov->visibility = (RL_Byte*) (memory + sizeof(*fov));
+    RL_ASSERT(fov);
+    RL_ASSERT(fov->visibility);
+
+    return fov;
+}
+
+void rl_fov_destroy(RL_FOV *fov)
+{
+    if (fov) {
+        RL_FREE(fov);
+    }
+}
+
+typedef struct {
+    int Y;
+    int X;
+} RL_Slope;
+
+/* adapted from: https://www.adammil.net/blog/v125_Roguelike_Vision_Algorithms.html#shadowcode (public domain) */
+/* also see: https://www.roguebasin.com/index.php/FOV_using_recursive_shadowcasting */
+void rl_fov_calculate_recursive(void *map, unsigned int origin_x, unsigned int origin_y, RL_IsInRangeFun in_range_f, RL_IsOpaqueFun opaque_f, RL_MarkAsVisibleFun mark_visible_f, unsigned int octant, float original_x, RL_Slope top, RL_Slope bottom)
+{
+    int x;
+    RL_ASSERT(in_range_f);
+    RL_ASSERT(opaque_f);
+    RL_ASSERT(mark_visible_f);
+    for(x = original_x; x < RL_MAX_RECURSION; x++)
+    {
+        /* compute the Y coordinates where the top vector leaves the column (on the right) and where the bottom vector */
+        /* enters the column (on the left). this equals (x+0.5)*top+0.5 and (x-0.5)*bottom+0.5 respectively, which can */
+        /* be computed like (x+0.5)*top+0.5 = (2(x+0.5)*top+1)/2 = ((2x+1)*top+1)/2 to avoid floating point math */
+        /* the rounding is a bit tricky, though */
+        int topY = top.X == 1 ? x : ((x*2+1) * top.Y + top.X - 1) / (top.X*2); /* the rounding is a bit tricky, though */
+        int bottomY = bottom.Y == 0 ? 0 : ((x*2-1) * bottom.Y + bottom.X) / (bottom.X*2);
+        int wasOpaque = -1; /* 0:false, 1:true, -1:not applicable */
+        int y;
+        for(y=topY; y >= bottomY; y--)
+        {
+            float tx = origin_x, ty = origin_y;
+            bool inRange, isOpaque;
+            switch(octant) /* translate local coordinates to map coordinates */
+            {
+                case 0: tx += x; ty -= y; break;
+                case 1: tx += y; ty -= x; break;
+                case 2: tx -= y; ty -= x; break;
+                case 3: tx -= x; ty -= y; break;
+                case 4: tx -= x; ty += y; break;
+                case 5: tx -= y; ty += x; break;
+                case 6: tx += y; ty += x; break;
+                case 7: tx += x; ty += y; break;
+            }
+
+            inRange = in_range_f(tx, ty, map);
+            if(inRange) {
+                if (RL_FOV_SYMMETRIC && (y != topY || top.Y*(int)x >= top.X*y) && (y != bottomY || bottom.Y*(int)x <= bottom.X*y)) {
+                    mark_visible_f(tx, ty, map);
+                } else if (!RL_FOV_SYMMETRIC) {
+                    mark_visible_f(tx, ty, map);
+                }
+            }
+
+            if (x == original_x && !inRange) {
+                return;
+            }
+
+            isOpaque = !inRange || opaque_f(tx, ty, map);
+            if(isOpaque)
+            {
+                if(wasOpaque == 0) /* if we found a transition from clear to opaque, this sector is done in this column, so */
+                {                  /* adjust the bottom vector upwards and continue processing it in the next column. */
+                    RL_Slope newBottom;
+                    newBottom.Y = y*2 + 1; /* (x*2-1, y*2+1) is a vector to the top-left of the opaque tile */
+                    newBottom.X = x*2 - 1;
+                    if(!inRange || y == bottomY) { bottom = newBottom; break; } /* don't recurse unless we have to */
+                    else if (inRange) rl_fov_calculate_recursive(map, origin_x, origin_y, in_range_f, opaque_f, mark_visible_f, octant, x+1, top, newBottom);
+                }
+                wasOpaque = 1;
+            }
+            else /* adjust top vector downwards and continue if we found a transition from opaque to clear */
+            {    /* (x*2+1, y*2+1) is the top-right corner of the clear tile (i.e. the bottom-right of the opaque tile) */
+                if(wasOpaque > 0) {
+                    top.Y = y*2 + 1;
+                    top.X = x*2 + 1;
+                }
+                wasOpaque = 0;
+            }
+        }
+
+        if(wasOpaque != 0) break; /* if the column ended in a clear tile, continue processing the current sector */
+    }
+}
+
+struct RL_FOVMap {
+    RL_FOV *fov;
+    const RL_Map *map;
+    unsigned int origin_x;
+    unsigned int origin_y;
+    int fov_radius;
+};
+
+void rl_fovmap_mark_visible_f(unsigned int x, unsigned int y, void *context)
+{
+    struct RL_FOVMap *map = (struct RL_FOVMap*) context;
+    if (rl_map_in_bounds(map->map, x, y)) {
+        map->fov->visibility[x + y*map->map->width] = RL_TileVisible;
+    }
+}
+
+bool rl_fovmap_opaque_f(unsigned int x, unsigned int y, void *context)
+{
+    struct RL_FOVMap *map = (struct RL_FOVMap*) context;
+    return RL_OPAQUE_F(map->map, x, y);
+}
+
+bool rl_fovmap_in_range_f(unsigned int x, unsigned int y, void *context)
+{
+    struct RL_FOVMap *map = (struct RL_FOVMap*) context;
+#if RL_ENABLE_PATHFINDING
+    RL_Point p1, p2;
+    p1.x = map->origin_x;
+    p1.y = map->origin_y;
+    p2.x = x;
+    p2.y = y;
+    return map->fov_radius < 0 || RL_FOV_DISTANCE_F(p1, p2) <= (float)map->fov_radius;
+#else
+    /* simplistic manhattan distance distance */
+    int diff_x = (int)map->origin_x - (int)x;
+    int diff_y = (int)map->origin_y - (int)y;
+    if (diff_x < 0) diff_x *= -1;
+    if (diff_y < 0) diff_y *= -1;
+    return map->fov_radius < 0 || diff_x + diff_y < map->fov_radius;
+#endif
+}
+
+void rl_fov_calculate(RL_FOV *fov, const RL_Map *map, unsigned int x, unsigned int y, int fov_radius)
+{
+    struct RL_FOVMap fovmap;
+    unsigned int cur_x, cur_y;
+    if (!rl_map_in_bounds(map, x, y)) {
+        return;
+    }
+    /* set previously visible tiles to seen */
+    for (cur_x=0; cur_x<map->width; ++cur_x) {
+        for (cur_y=0; cur_y<map->height; ++cur_y) {
+            if (fov->visibility[cur_x + cur_y*map->width] == RL_TileVisible) {
+                fov->visibility[cur_x + cur_y*map->width] = RL_TileSeen;
+            }
+        }
+    }
+    fovmap.map = map;
+    fovmap.fov = fov;
+    fovmap.origin_x = x;
+    fovmap.origin_y = y;
+    fovmap.fov_radius = fov_radius;
+    rl_fov_calculate_ex(&fovmap, x, y, rl_fovmap_in_range_f, rl_fovmap_opaque_f, rl_fovmap_mark_visible_f);
+}
+
+void rl_fov_calculate_ex(void *context, unsigned int x, unsigned int y, RL_IsInRangeFun in_range_f, RL_IsOpaqueFun opaque_f, RL_MarkAsVisibleFun mark_visible_f)
+{
+    int octant;
+    RL_Slope from = { 1, 1 };
+    RL_Slope to = { 0, 1 };
+    mark_visible_f(x, y, context);
+    for (octant=0; octant<8; ++octant) {
+        rl_fov_calculate_recursive(context, x, y, in_range_f, opaque_f, mark_visible_f, octant, 1, from, to);
+    }
+}
+
+bool rl_fov_is_visible(const RL_FOV *map, unsigned int x, unsigned int y)
+{
+    if (map == NULL) return false;
+    if (!rl_map_in_bounds((const RL_Map*) map, x, y)) {
+        return false;
+    }
+    return map->visibility[x + y*map->width] == RL_TileVisible;
+}
+
+bool rl_fov_is_seen(const RL_FOV *map, unsigned int x, unsigned int y)
+{
+    if (map == NULL) return false;
+    if (!rl_map_in_bounds((const RL_Map*) map, x, y)) {
+        return false;
+    }
+    return map->visibility[x + y*map->width] == RL_TileSeen;
+}
+#endif /* if RL_ENABLE_FOV */
+
+#if RL_ENABLE_FILE
+#include <stdio.h>
+
+bool rl_file_save_map(const RL_Map *data, void *file)
+{
+    int version = 0;
+
+    RL_ASSERT(data != NULL && file != NULL);
+    if (fwrite(&version, sizeof(version), 1, (FILE*) file) < 1) {
+        return false;
+    }
+    if (fwrite(data, sizeof(*data), 1, (FILE*)file) < 1) {
+        return false;
+    }
+    if (fwrite(data->tiles, sizeof(*data->tiles), data->width * data->height, (FILE*) file) < data->width * data->height) {
+        return false;
+    }
+    return true;
+}
+
+bool rl_file_load_map(RL_Map **data, void *file)
+{
+    int version;
+    RL_Map dest;
+
+    RL_ASSERT(data != NULL && file != NULL);
+    if (fread(&version, sizeof(version), 1, (FILE*) file) < 1) {
+        return false;
+    }
+    if (version != 0) {
+        return false;
+    }
+
+    if (fread(&dest, sizeof(dest), 1, (FILE*) file) < 1) {
+        return false;
+    }
+    RL_ASSERT(dest.width > 0 && dest.height > 0);
+    dest.tiles = (RL_Byte*) malloc(sizeof(*dest.tiles) * dest.width * dest.height);
+    RL_ASSERT(dest.tiles != NULL);
+    if (fread(dest.tiles, sizeof(*dest.tiles), dest.width * dest.height, (FILE*) file) < dest.width * dest.height) {
+        return false;
+    }
+
+    *data = (RL_Map*) malloc(sizeof(dest));
+    RL_ASSERT(*data != NULL);
+    *data = (RL_Map*) memcpy(*data, &dest, sizeof(dest));
+    if (data == NULL) {
+        return false;
+    }
+
+    return true;
+}
+
+bool rl_file_save_fov(const RL_FOV *data, void *file)
+{
+    int version = 0;
+
+    RL_ASSERT(data != NULL && file != NULL);
+    if (fwrite(&version, sizeof(version), 1, (FILE*) file) < 1) {
+        return false;
+    }
+    if (fwrite(data, sizeof(*data), 1, (FILE*) file) < 1) {
+        return false;
+    }
+    if (fwrite(data->visibility, sizeof(*data->visibility), data->width * data->height, (FILE*) file) < data->width * data->height) {
+        return false;
+    }
+    return true;
+}
+
+bool rl_file_load_fov(RL_FOV **data, void *file)
+{
+    int version;
+    RL_FOV dest;
+
+    RL_ASSERT(data != NULL && file != NULL);
+    if (fread(&version, sizeof(version), 1, (FILE*) file) < 1) {
+        return false;
+    }
+    if (version != 0) {
+        return false;
+    }
+
+    if (fread(&dest, sizeof(dest), 1, (FILE*) file) < 1) {
+        return false;
+    }
+    RL_ASSERT(dest.width > 0 && dest.height > 0);
+    dest.visibility = (RL_Byte*) malloc(sizeof(*dest.visibility) * dest.width * dest.height);
+    RL_ASSERT(dest.visibility != NULL);
+    if (fread(dest.visibility, sizeof(*dest.visibility), dest.width * dest.height, (FILE*)file) < dest.width * dest.height) {
+        return false;
+    }
+
+    *data = (RL_FOV*) malloc(sizeof(dest));
+    RL_ASSERT(*data != NULL);
+    *data = (RL_FOV*) memcpy(*data, &dest, sizeof(dest));
+    if (data == NULL) {
+        return false;
+    }
+
+    return true;
+}
+#endif /* if RL_ENABLE_FILE */
+
+#endif /* RL_IMPLEMENTATION */
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/include/extern/rres-raylib.h b/include/extern/rres-raylib.h
new file mode 100644
index 0000000..056aa95
--- /dev/null
+++ b/include/extern/rres-raylib.h
@@ -0,0 +1,1094 @@
+/**********************************************************************************************
+*
+*   rres-raylib v1.2 - rres loaders specific for raylib data structures
+*
+*   CONFIGURATION:
+*
+*   #define RRES_RAYLIB_IMPLEMENTATION
+*       Generates the implementation of the library into the included file.
+*       If not defined, the library is in header only mode and can be included in other headers
+*       or source files without problems. But only ONE file should hold the implementation.
+*
+*   #define RRES_SUPPORT_COMPRESSION_LZ4
+*       Support data compression algorithm LZ4, provided by lz4.h/lz4.c library
+*
+*   #define RRES_SUPPORT_ENCRYPTION_AES
+*       Support data encryption algorithm AES, provided by aes.h/aes.c library
+*
+*   #define RRES_SUPPORT_ENCRYPTION_XCHACHA20
+*       Support data encryption algorithm XChaCha20-Poly1305,
+*       provided by monocypher.h/monocypher.c library
+*
+*   DEPENDENCIES:
+*
+*     - raylib.h: Data types definition and data loading from memory functions
+*                 WARNING: raylib.h MUST be included before including rres-raylib.h
+*     - rres.h:   Base implementation of rres specs, required to read rres files and resource chunks
+*     - lz4.h:    LZ4 compression support (optional)
+*     - aes.h:    AES-256 CTR encryption support (optional)
+*     - monocypher.h: for XChaCha20-Poly1305 encryption support (optional) 
+*
+*   VERSION HISTORY:
+*
+*     - 1.2 (15-Apr-2023): Updated to monocypher 4.0.1
+*     - 1.0 (11-May-2022): Initial implementation release
+*
+*
+*   LICENSE: MIT
+*
+*   Copyright (c) 2020-2023 Ramon Santamaria (@raysan5)
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a copy
+*   of this software and associated documentation files (the "Software"), to deal
+*   in the Software without restriction, including without limitation the rights
+*   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+*   copies of the Software, and to permit persons to whom the Software is
+*   furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included in all
+*   copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+*   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+**********************************************************************************************/
+
+#ifndef RRES_RAYLIB_H
+#define RRES_RAYLIB_H
+
+#ifndef RRES_H
+    #include "rres.h"
+#endif
+
+//----------------------------------------------------------------------------------
+// Defines and Macros
+//----------------------------------------------------------------------------------
+//...
+
+//----------------------------------------------------------------------------------
+// Types and Structures Definition
+//----------------------------------------------------------------------------------
+//...
+
+//----------------------------------------------------------------------------------
+// Global variables
+//----------------------------------------------------------------------------------
+//...
+
+//----------------------------------------------------------------------------------
+// Module Functions Declaration
+//----------------------------------------------------------------------------------
+#if defined(__cplusplus)
+extern "C" {            // Prevents name mangling of functions
+#endif
+
+// rres data loading to raylib data structures
+// NOTE: Chunk data must be provided uncompressed/unencrypted
+RLAPI void *LoadDataFromResource(rresResourceChunk chunk, unsigned int *size); // Load raw data from rres resource chunk
+RLAPI char *LoadTextFromResource(rresResourceChunk chunk);      // Load text data from rres resource chunk
+RLAPI Image LoadImageFromResource(rresResourceChunk chunk);     // Load Image data from rres resource chunk
+RLAPI Wave LoadWaveFromResource(rresResourceChunk chunk);       // Load Wave data from rres resource chunk
+RLAPI Font LoadFontFromResource(rresResourceMulti multi);       // Load Font data from rres resource multiple chunks
+RLAPI Mesh LoadMeshFromResource(rresResourceMulti multi);       // Load Mesh data from rres resource multiple chunks
+
+// Unpack resource chunk data (decompres/decrypt data)
+// NOTE: Function return 0 on success or other value on failure
+RLAPI int UnpackResourceChunk(rresResourceChunk *chunk);        // Unpack resource chunk data (decompress/decrypt)
+                                                            
+// Set base directory for externally linked data
+// NOTE: When resource chunk contains an external link (FourCC: LINK, Type: RRES_DATA_LINK),
+// a base directory is required to be prepended to link path
+// If not provided, the application path is prepended to link by default 
+RLAPI void SetBaseDirectory(const char *baseDir);               // Set base directory for externally linked data
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif // RRES_RAYLIB_H
+
+/***********************************************************************************
+*
+*   RRES RAYLIB IMPLEMENTATION
+*
+************************************************************************************/
+
+#if defined(RRES_RAYLIB_IMPLEMENTATION)
+
+// Compression/Encryption algorithms supported
+// NOTE: They should be the same supported by the rres packaging tool (rrespacker)
+// https://github.com/phoboslab/qoi
+#include "external/qoi.h"                   // Compression algorithm: QOI (implementation in raylib)
+
+#if defined(RRES_SUPPORT_COMPRESSION_LZ4)
+    // https://github.com/lz4/lz4
+    #include "external/lz4.h"               // Compression algorithm: LZ4
+    #include "external/lz4.c"               // Compression algorithm implementation: LZ4
+#endif
+#if defined(RRES_SUPPORT_ENCRYPTION_AES)
+    // https://github.com/kokke/tiny-AES-c
+    #include "external/aes.h"               // Encryption algorithm: AES
+    #include "external/aes.c"               // Encryption algorithm implementation: AES
+#endif
+#if defined(RRES_SUPPORT_ENCRYPTION_XCHACHA20)
+    // https://github.com/LoupVaillant/Monocypher
+    #include "external/monocypher.h"        // Encryption algorithm: XChaCha20-Poly1305
+    #include "external/monocypher.c"        // Encryption algorithm implementation: XChaCha20-Poly1305
+#endif
+
+//----------------------------------------------------------------------------------
+// Defines and Macros
+//----------------------------------------------------------------------------------
+//...
+
+//----------------------------------------------------------------------------------
+// Types and Structures Definition
+//----------------------------------------------------------------------------------
+//...
+
+//----------------------------------------------------------------------------------
+// Global Variables Definition
+//----------------------------------------------------------------------------------
+static const char *baseDir = NULL;      // Base directory pointer, used on external linked data loading
+
+//----------------------------------------------------------------------------------
+// Module specific Functions Declaration
+//----------------------------------------------------------------------------------
+
+// Load simple data chunks that are later required by multi-chunk resources
+// NOTE: Chunk data must be provided uncompressed/unencrypted
+static void *LoadDataFromResourceLink(rresResourceChunk chunk, unsigned int *size);      // Load chunk: RRES_DATA_LINK
+static void *LoadDataFromResourceChunk(rresResourceChunk chunk, unsigned int *size);     // Load chunk: RRES_DATA_RAW
+static char *LoadTextFromResourceChunk(rresResourceChunk chunk, unsigned int *codeLang); // Load chunk: RRES_DATA_TEXT
+static Image LoadImageFromResourceChunk(rresResourceChunk chunk);                        // Load chunk: RRES_DATA_IMAGE
+
+static const char *GetExtensionFromProps(unsigned int ext01, unsigned int ext02);        // Get file extension from RRES_DATA_RAW properties (unsigned int) 
+static unsigned int *ComputeMD5(unsigned char *data, int size);                          // Compute MD5 hash code, returns 4 integers array (static)
+
+//----------------------------------------------------------------------------------
+// Module Functions Definition
+//----------------------------------------------------------------------------------
+
+// Load raw data from rres resource
+void *LoadDataFromResource(rresResourceChunk chunk, unsigned int *size)
+{
+    void *rawData = NULL;
+
+    // Data can be provided in the resource or linked to an external file
+    if (rresGetDataType(chunk.info.type) == RRES_DATA_RAW)       // Raw data
+    {
+        rawData = LoadDataFromResourceChunk(chunk, size);
+    }
+    else if (rresGetDataType(chunk.info.type) == RRES_DATA_LINK) // Link to external file
+    {
+        // Get raw data from external linked file
+        unsigned int dataSize = 0;
+        void *data = LoadDataFromResourceLink(chunk, &dataSize);
+
+        rawData = data;
+        *size = dataSize;
+    }
+
+    return rawData;
+}
+
+// Load text data from rres resource
+// NOTE: Text must be NULL terminated
+char *LoadTextFromResource(rresResourceChunk chunk)
+{
+    char *text = NULL;
+    int codeLang = 0;
+
+    if (rresGetDataType(chunk.info.type) == RRES_DATA_TEXT)       // Text data
+    {
+        text = LoadTextFromResourceChunk(chunk, &codeLang);
+
+        // TODO: Consider text code language to load shader or code scripts
+    }
+    else if (rresGetDataType(chunk.info.type) == RRES_DATA_RAW)   // Raw text file
+    {
+        unsigned int size = 0;
+        text = LoadDataFromResourceChunk(chunk, &size);
+    }
+    else if (rresGetDataType(chunk.info.type) == RRES_DATA_LINK)  // Link to external file
+    {
+        // Get raw data from external linked file
+        unsigned int dataSize = 0;
+        void *data = LoadDataFromResourceLink(chunk, &dataSize);
+        text = data;
+    }
+
+    return text;
+}
+
+// Load Image data from rres resource
+Image LoadImageFromResource(rresResourceChunk chunk)
+{
+    Image image = { 0 };
+
+    if (rresGetDataType(chunk.info.type) == RRES_DATA_IMAGE)          // Image data
+    {
+        image = LoadImageFromResourceChunk(chunk);
+    }
+    else if (rresGetDataType(chunk.info.type) == RRES_DATA_RAW)       // Raw image file
+    {
+        unsigned int dataSize = 0;
+        unsigned char *data = LoadDataFromResourceChunk(chunk, &dataSize);
+
+        image = LoadImageFromMemory(GetExtensionFromProps(chunk.data.props[1], chunk.data.props[2]), data, dataSize);
+
+        RL_FREE(data);
+    }
+    else if (rresGetDataType(chunk.info.type) == RRES_DATA_LINK)      // Link to external file
+    {
+        // Get raw data from external linked file
+        unsigned int dataSize = 0;
+        void *data = LoadDataFromResourceLink(chunk, &dataSize);
+
+        // Load image from linked file data
+        // NOTE: Function checks internally if the file extension is supported to
+        // properly load the data, if it fails it logs the result and image.data = NULL
+        image = LoadImageFromMemory(GetFileExtension(chunk.data.raw), data, dataSize);
+    }
+
+    return image;
+}
+
+// Load Wave data from rres resource
+Wave LoadWaveFromResource(rresResourceChunk chunk)
+{
+    Wave wave = { 0 };
+
+    if (rresGetDataType(chunk.info.type) == RRES_DATA_WAVE)       // Wave data
+    {
+        if ((chunk.info.compType == RRES_COMP_NONE) && (chunk.info.cipherType == RRES_CIPHER_NONE))
+        {
+            wave.frameCount = chunk.data.props[0];
+            wave.sampleRate = chunk.data.props[1];
+            wave.sampleSize = chunk.data.props[2];
+            wave.channels = chunk.data.props[3];
+
+            unsigned int size = wave.frameCount*wave.sampleSize/8;
+            wave.data = RL_CALLOC(size, 1);
+            memcpy(wave.data, chunk.data.raw, size);
+        }
+        RRES_LOG("RRES: %c%c%c%c: WARNING: Data must be decompressed/decrypted\n", chunk.info.type[0], chunk.info.type[1], chunk.info.type[2], chunk.info.type[3]);
+    }
+    else if (rresGetDataType(chunk.info.type) == RRES_DATA_RAW)   // Raw wave file
+    {
+        unsigned int dataSize = 0;
+        unsigned char *data = LoadDataFromResourceChunk(chunk, &dataSize);
+
+        wave = LoadWaveFromMemory(GetExtensionFromProps(chunk.data.props[1], chunk.data.props[2]), data, dataSize);
+
+        RL_FREE(data);
+    }
+    else if (rresGetDataType(chunk.info.type) == RRES_DATA_LINK)  // Link to external file
+    {
+        // Get raw data from external linked file
+        unsigned int dataSize = 0;
+        void *data = LoadDataFromResourceLink(chunk, &dataSize);
+
+        // Load wave from linked file data
+        // NOTE: Function checks internally if the file extension is supported to
+        // properly load the data, if it fails it logs the result and wave.data = NULL
+        wave = LoadWaveFromMemory(GetFileExtension(chunk.data.raw), data, dataSize);
+    }
+
+    return wave;
+}
+
+// Load Font data from rres resource
+Font LoadFontFromResource(rresResourceMulti multi)
+{
+    Font font = { 0 };
+
+    // Font resource consist of (2) chunks:
+    //  - RRES_DATA_FONT_GLYPHS: Basic font and glyphs properties/data
+    //  - RRES_DATA_IMAGE: Image atlas for the font characters
+    if (multi.count >= 2)
+    {
+        if (rresGetDataType(multi.chunks[0].info.type) == RRES_DATA_FONT_GLYPHS)
+        {
+            if ((multi.chunks[0].info.compType == RRES_COMP_NONE) && (multi.chunks[0].info.cipherType == RRES_CIPHER_NONE))
+            {
+                // Load font basic properties from chunk[0]
+                font.baseSize = multi.chunks[0].data.props[0];           // Base size (default chars height)
+                font.glyphCount = multi.chunks[0].data.props[1];         // Number of characters (glyphs)
+                font.glyphPadding = multi.chunks[0].data.props[2];      // Padding around the chars
+
+                font.recs = (Rectangle *)RL_CALLOC(font.glyphCount, sizeof(Rectangle));
+                font.glyphs = (GlyphInfo *)RL_CALLOC(font.glyphCount, sizeof(GlyphInfo));
+
+                for (int i = 0; i < font.glyphCount; i++)
+                {
+                    // Font glyphs info comes as a data blob
+                    font.recs[i].x = (float)((rresFontGlyphInfo *)multi.chunks[0].data.raw)[i].x;
+                    font.recs[i].y = (float)((rresFontGlyphInfo *)multi.chunks[0].data.raw)[i].y;
+                    font.recs[i].width = (float)((rresFontGlyphInfo *)multi.chunks[0].data.raw)[i].width;
+                    font.recs[i].height = (float)((rresFontGlyphInfo *)multi.chunks[0].data.raw)[i].height;
+
+                    font.glyphs[i].value = ((rresFontGlyphInfo *)multi.chunks[0].data.raw)[i].value;
+                    font.glyphs[i].offsetX = ((rresFontGlyphInfo *)multi.chunks[0].data.raw)[i].offsetX;
+                    font.glyphs[i].offsetY = ((rresFontGlyphInfo *)multi.chunks[0].data.raw)[i].offsetY;
+                    font.glyphs[i].advanceX = ((rresFontGlyphInfo *)multi.chunks[0].data.raw)[i].advanceX;
+
+                    // NOTE: font.glyphs[i].image is not loaded
+                }
+            }
+            else RRES_LOG("RRES: %s: WARNING: Data must be decompressed/decrypted\n", multi.chunks[0].info.type);
+        }
+
+        // Load font image chunk
+        if (rresGetDataType(multi.chunks[1].info.type) == RRES_DATA_IMAGE)
+        {
+            if ((multi.chunks[0].info.compType == RRES_COMP_NONE) && (multi.chunks[0].info.cipherType == RRES_CIPHER_NONE))
+            {
+                Image image = LoadImageFromResourceChunk(multi.chunks[1]);
+                font.texture = LoadTextureFromImage(image);
+                UnloadImage(image);
+            }
+            else RRES_LOG("RRES: %s: WARNING: Data must be decompressed/decrypted\n", multi.chunks[1].info.type);
+        }
+    }
+    else    // One chunk of data: RRES_DATA_RAW or RRES_DATA_LINK?
+    {
+        if (rresGetDataType(multi.chunks[0].info.type) == RRES_DATA_RAW)      // Raw font file
+        {
+            unsigned int dataSize = 0;
+            unsigned char *rawData = LoadDataFromResourceChunk(multi.chunks[0], &dataSize);
+
+            font = LoadFontFromMemory(GetExtensionFromProps(multi.chunks[0].data.props[1], multi.chunks[0].data.props[2]), rawData, dataSize, 32, NULL, 0);
+
+            RL_FREE(rawData);
+        }
+        if (rresGetDataType(multi.chunks[0].info.type) == RRES_DATA_LINK)     // Link to external font file
+        {
+            // Get raw data from external linked file
+            unsigned int dataSize = 0;
+            void *rawData = LoadDataFromResourceLink(multi.chunks[0], &dataSize);
+
+            // Load image from linked file data
+            // NOTE 1: Loading font at 32px base size and default charset (95 glyphs)
+            // NOTE 2: Function checks internally if the file extension is supported to
+            // properly load the data, if it fails it logs the result and font.texture.id = 0
+            font = LoadFontFromMemory(GetFileExtension(multi.chunks[0].data.raw), rawData, dataSize, 32, NULL, 0);
+
+            RRES_FREE(rawData);
+        }
+    }
+
+    return font;
+}
+
+// Load Mesh data from rres resource
+// NOTE: We try to load vertex data following raylib structure constraints,
+// in case data does not fit raylib Mesh structure, it is not loaded
+Mesh LoadMeshFromResource(rresResourceMulti multi)
+{
+    Mesh mesh = { 0 };
+
+    // TODO: Support externally linked mesh resource?
+
+    // Mesh resource consist of (n) chunks:
+    for (unsigned int i = 0; i < multi.count; i++)
+    {
+        if ((multi.chunks[0].info.compType == RRES_COMP_NONE) && (multi.chunks[0].info.cipherType == RRES_CIPHER_NONE))
+        {
+            // NOTE: raylib only supports vertex arrays with same vertex count,
+            // rres.chunks[0] defined vertexCount will be the reference for the following chunks
+            // The only exception to vertexCount is the mesh.indices array
+            if (mesh.vertexCount == 0) mesh.vertexCount = multi.chunks[0].data.props[0];
+
+            // Verify chunk type and vertex count
+            if (rresGetDataType(multi.chunks[i].info.type) == RRES_DATA_VERTEX)
+            {
+                // In case vertex count do not match we skip that resource chunk
+                if ((multi.chunks[i].data.props[1] != RRES_VERTEX_ATTRIBUTE_INDEX) && (multi.chunks[i].data.props[0] != mesh.vertexCount)) continue;
+
+                // NOTE: We are only loading raylib supported rresVertexFormat and raylib expected components count
+                switch (multi.chunks[i].data.props[1])    // Check rresVertexAttribute value
+                {
+                    case RRES_VERTEX_ATTRIBUTE_POSITION:
+                    {
+                        // raylib expects 3 components per vertex and float vertex format
+                        if ((multi.chunks[i].data.props[2] == 3) && (multi.chunks[i].data.props[3] == RRES_VERTEX_FORMAT_FLOAT))
+                        {
+                            mesh.vertices = (float *)RL_CALLOC(mesh.vertexCount*3, sizeof(float));
+                            memcpy(mesh.vertices, multi.chunks[i].data.raw, mesh.vertexCount*3*sizeof(float));
+                        }
+                        else RRES_LOG("RRES: WARNING: MESH: Vertex attribute position not valid, componentCount/vertexFormat do not fit\n");
+
+                    } break;
+                    case RRES_VERTEX_ATTRIBUTE_TEXCOORD1:
+                    {
+                        // raylib expects 2 components per vertex and float vertex format
+                        if ((multi.chunks[i].data.props[2] == 2) && (multi.chunks[i].data.props[3] == RRES_VERTEX_FORMAT_FLOAT))
+                        {
+                            mesh.texcoords = (float *)RL_CALLOC(mesh.vertexCount*2, sizeof(float));
+                            memcpy(mesh.texcoords, multi.chunks[i].data.raw, mesh.vertexCount*2*sizeof(float));
+                        }
+                        else RRES_LOG("RRES: WARNING: MESH: Vertex attribute texcoord1 not valid, componentCount/vertexFormat do not fit\n");
+
+                    } break;
+                    case RRES_VERTEX_ATTRIBUTE_TEXCOORD2:
+                    {
+                        // raylib expects 2 components per vertex and float vertex format
+                        if ((multi.chunks[i].data.props[2] == 2) && (multi.chunks[i].data.props[3] == RRES_VERTEX_FORMAT_FLOAT))
+                        {
+                            mesh.texcoords2 = (float *)RL_CALLOC(mesh.vertexCount*2, sizeof(float));
+                            memcpy(mesh.texcoords2, multi.chunks[i].data.raw, mesh.vertexCount*2*sizeof(float));
+                        }
+                        else RRES_LOG("RRES: WARNING: MESH: Vertex attribute texcoord2 not valid, componentCount/vertexFormat do not fit\n");
+
+                    } break;
+                    case RRES_VERTEX_ATTRIBUTE_TEXCOORD3:
+                    {
+                        RRES_LOG("RRES: WARNING: MESH: Vertex attribute texcoord3 not supported\n");
+
+                    } break;
+                    case RRES_VERTEX_ATTRIBUTE_TEXCOORD4:
+                    {
+                        RRES_LOG("RRES: WARNING: MESH: Vertex attribute texcoord4 not supported\n");
+
+                    } break;
+                    case RRES_VERTEX_ATTRIBUTE_NORMAL:
+                    {
+                        // raylib expects 3 components per vertex and float vertex format
+                        if ((multi.chunks[i].data.props[2] == 3) && (multi.chunks[i].data.props[3] == RRES_VERTEX_FORMAT_FLOAT))
+                        {
+                            mesh.normals = (float *)RL_CALLOC(mesh.vertexCount*3, sizeof(float));
+                            memcpy(mesh.normals, multi.chunks[i].data.raw, mesh.vertexCount*3*sizeof(float));
+                        }
+                        else RRES_LOG("RRES: WARNING: MESH: Vertex attribute normal not valid, componentCount/vertexFormat do not fit\n");
+
+                    } break;
+                    case RRES_VERTEX_ATTRIBUTE_TANGENT:
+                    {
+                        // raylib expects 4 components per vertex and float vertex format
+                        if ((multi.chunks[i].data.props[2] == 4) && (multi.chunks[i].data.props[3] == RRES_VERTEX_FORMAT_FLOAT))
+                        {
+                            mesh.tangents = (float *)RL_CALLOC(mesh.vertexCount*4, sizeof(float));
+                            memcpy(mesh.tangents, multi.chunks[i].data.raw, mesh.vertexCount*4*sizeof(float));
+                        }
+                        else RRES_LOG("RRES: WARNING: MESH: Vertex attribute tangent not valid, componentCount/vertexFormat do not fit\n");
+
+                    } break;
+                    case RRES_VERTEX_ATTRIBUTE_COLOR:
+                    {
+                        // raylib expects 4 components per vertex and unsigned char vertex format
+                        if ((multi.chunks[i].data.props[2] == 4) && (multi.chunks[i].data.props[3] == RRES_VERTEX_FORMAT_UBYTE))
+                        {
+                            mesh.colors = (unsigned char *)RL_CALLOC(mesh.vertexCount*4, sizeof(unsigned char));
+                            memcpy(mesh.colors, multi.chunks[i].data.raw, mesh.vertexCount*4*sizeof(unsigned char));
+                        }
+                        else RRES_LOG("RRES: WARNING: MESH: Vertex attribute color not valid, componentCount/vertexFormat do not fit\n");
+
+                    } break;
+                    case RRES_VERTEX_ATTRIBUTE_INDEX:
+                    {
+                        // raylib expects 1 components per index and unsigned short vertex format
+                        if ((multi.chunks[i].data.props[2] == 1) && (multi.chunks[i].data.props[3] == RRES_VERTEX_FORMAT_USHORT))
+                        {
+                            mesh.indices = (unsigned short *)RL_CALLOC(multi.chunks[i].data.props[0], sizeof(unsigned short));
+                            memcpy(mesh.indices, multi.chunks[i].data.raw, multi.chunks[i].data.props[0]*sizeof(unsigned short));
+                        }
+                        else RRES_LOG("RRES: WARNING: MESH: Vertex attribute index not valid, componentCount/vertexFormat do not fit\n");
+
+                    } break;
+                    default: break;
+                }
+            }
+        }
+        else RRES_LOG("RRES: WARNING: Vertex provided data must be decompressed/decrypted\n");
+    }
+
+    return mesh;
+}
+
+// Unpack compressed/encrypted data from resource chunk
+// In case data could not be processed by rres.h, it is just copied in chunk.data.raw for processing here
+// NOTE 1: Function return 0 on success or an error code on failure
+// NOTE 2: Data corruption CRC32 check has already been performed by rresLoadResourceMulti() on rres.h
+int UnpackResourceChunk(rresResourceChunk *chunk)
+{
+    int result = 0;
+    bool updateProps = false;
+
+    // Result error codes:
+    //  0 - No error, decompression/decryption successful
+    //  1 - Encryption algorithm not supported
+    //  2 - Invalid password on decryption
+    //  3 - Compression algorithm not supported
+    //  4 - Error on data decompression
+
+    // NOTE 1: If data is compressed/encrypted the properties are not loaded by rres.h because
+    // it's up to the user to process the data; *chunk must be properly updated by this function
+    // NOTE 2: rres-raylib should support the same algorithms and libraries used by rrespacker tool
+    void *unpackedData = NULL;    
+
+    // STEP 1. Data decryption
+    //-------------------------------------------------------------------------------------
+    unsigned char *decryptedData = NULL;
+
+    switch (chunk->info.cipherType)
+    {
+        case RRES_CIPHER_NONE: decryptedData = chunk->data.raw; break;
+#if defined(RRES_SUPPORT_ENCRYPTION_AES)
+        case RRES_CIPHER_AES:
+        {
+            // WARNING: Implementation dependant!
+            // rrespacker tool appends (salt[16] + MD5[16]) to encrypted data for convenience,
+            // Actually, chunk->info.packedSize considers those additional elements
+
+            // Get some memory for the possible message output
+            decryptedData = (unsigned char *)RL_CALLOC(chunk->info.packedSize - 16 - 16, 1);
+            if (decryptedData != NULL) memcpy(decryptedData, chunk->data.raw, chunk->info.packedSize - 16 - 16);
+
+            // Required variables for key stretching
+            uint8_t key[32] = { 0 };                    // Encryption key
+            uint8_t salt[16] = { 0 };                   // Key stretching salt
+
+            // Retrieve salt from chunk packed data
+            // salt is stored at the end of packed data, before nonce and MAC: salt[16] + MD5[16]
+            memcpy(salt, ((unsigned char *)chunk->data.raw) + (chunk->info.packedSize - 16 - 16), 16);
+            
+            // Key stretching configuration
+            crypto_argon2_config config = {
+                .algorithm = CRYPTO_ARGON2_I,           // Algorithm: Argon2i
+                .nb_blocks = 16384,                     // Blocks: 16 MB
+                .nb_passes = 3,                         // Iterations
+                .nb_lanes  = 1                          // Single-threaded
+            };
+            crypto_argon2_inputs inputs = {
+                .pass = (const uint8_t *)rresGetCipherPassword(),     // User password
+                .pass_size = 16,                        // Password length
+                .salt = salt,                           // Salt for the password
+                .salt_size = 16
+            };
+            crypto_argon2_extras extras = { 0 };        // Extra parameters unused
+
+            void *workArea = RL_MALLOC(config.nb_blocks*1024);    // Key stretching work area
+
+            // Generate strong encryption key, generated from user password using Argon2i algorithm (256 bit)
+            crypto_argon2(key, 32, workArea, config, inputs, extras);
+
+            // Wipe key generation secrets, they are no longer needed
+            crypto_wipe(salt, 16);
+            RL_FREE(workArea);
+
+            // Required variables for decryption and message authentication
+            unsigned int md5[4] = { 0 };                // Message Authentication Code generated on encryption
+
+            // Retrieve MD5 from chunk packed data
+            // NOTE: MD5 is stored at the end of packed data, after salt: salt[16] + MD5[16]
+            memcpy(md5, ((unsigned char *)chunk->data.raw) + (chunk->info.packedSize - 16), 4*sizeof(unsigned int));
+
+            // Message decryption, requires key
+            struct AES_ctx ctx = { 0 };
+            AES_init_ctx(&ctx, key);
+            AES_CTR_xcrypt_buffer(&ctx, (uint8_t *)decryptedData, chunk->info.packedSize - 16 - 16);   // AES Counter mode, stream cipher
+
+            // Verify MD5 to check if data decryption worked
+            unsigned int decryptMD5[4] = { 0 };
+            unsigned int *md5Ptr = ComputeMD5(decryptedData, chunk->info.packedSize - 16 - 16);
+            for (int i = 0; i < 4; i++) decryptMD5[i] = md5Ptr[i];
+
+            // Wipe secrets if they are no longer needed
+            crypto_wipe(key, 32);
+
+            if (memcmp(decryptMD5, md5, 4*sizeof(unsigned int)) == 0)    // Decrypted successfully!
+            {
+                chunk->info.packedSize -= (16 + 16);    // We remove additional data size from packed size (salt[16] + MD5[16])
+                RRES_LOG("RRES: %c%c%c%c: Data decrypted successfully (AES)\n", chunk->info.type[0], chunk->info.type[1], chunk->info.type[2], chunk->info.type[3]);
+            }
+            else
+            {
+                result = 2;    // Data was not decrypted as expected, wrong password or message corrupted
+                RRES_LOG("RRES: WARNING: %c%c%c%c: Data decryption failed, wrong password or corrupted data\n", chunk->info.type[0], chunk->info.type[1], chunk->info.type[2], chunk->info.type[3]);
+            }
+
+        } break;
+#endif
+#if defined(RRES_SUPPORT_ENCRYPTION_XCHACHA20)
+        case RRES_CIPHER_XCHACHA20_POLY1305:
+        {
+            // WARNING: Implementation dependant!
+            // rrespacker tool appends (salt[16] + nonce[24] + MAC[16]) to encrypted data for convenience,
+            // Actually, chunk->info.packedSize considers those additional elements
+
+            // Get some memory for the possible message output
+            decryptedData = (unsigned char *)RL_CALLOC(chunk->info.packedSize - 16 - 24 - 16, 1);
+
+            // Required variables for key stretching
+            uint8_t key[32] = { 0 };                    // Encryption key
+            uint8_t salt[16] = { 0 };                   // Key stretching salt
+
+            // Retrieve salt from chunk packed data
+            // salt is stored at the end of packed data, before nonce and MAC: salt[16] + nonce[24] + MAC[16]
+            memcpy(salt, ((unsigned char *)chunk->data.raw) + (chunk->info.packedSize - 16 - 24 - 16), 16);
+            
+            // Key stretching configuration
+            crypto_argon2_config config = {
+                .algorithm = CRYPTO_ARGON2_I,           // Algorithm: Argon2i
+                .nb_blocks = 16384,                     // Blocks: 16 MB
+                .nb_passes = 3,                         // Iterations
+                .nb_lanes  = 1                          // Single-threaded
+            };
+            crypto_argon2_inputs inputs = {
+                .pass = (const uint8_t *)rresGetCipherPassword(),     // User password
+                .pass_size = 16,                        // Password length
+                .salt = salt,                           // Salt for the password
+                .salt_size = 16
+            };
+            crypto_argon2_extras extras = { 0 };        // Extra parameters unused
+
+            void *workArea = RL_MALLOC(config.nb_blocks*1024);    // Key stretching work area
+
+            // Generate strong encryption key, generated from user password using Argon2i algorithm (256 bit)
+            crypto_argon2(key, 32, workArea, config, inputs, extras);
+
+            // Wipe key generation secrets, they are no longer needed
+            crypto_wipe(salt, 16);
+            RL_FREE(workArea);
+
+            // Required variables for decryption and message authentication
+            uint8_t nonce[24] = { 0 };                  // nonce used on encryption, unique to processed file
+            uint8_t mac[16] = { 0 };                    // Message Authentication Code generated on encryption
+
+            // Retrieve nonce and MAC from chunk packed data
+            // nonce and MAC are stored at the end of packed data, after salt: salt[16] + nonce[24] + MAC[16]
+            memcpy(nonce, ((unsigned char *)chunk->data.raw) + (chunk->info.packedSize - 16 - 24), 24);
+            memcpy(mac, ((unsigned char *)chunk->data.raw) + (chunk->info.packedSize - 16), 16);
+
+            // Message decryption requires key, nonce and MAC
+            int decryptResult = crypto_aead_unlock(decryptedData, mac, key, nonce, NULL, 0, chunk->data.raw, (chunk->info.packedSize - 16 - 24 - 16));
+
+            // Wipe secrets if they are no longer needed
+            crypto_wipe(nonce, 24);
+            crypto_wipe(key, 32);
+
+            if (decryptResult == 0)    // Decrypted successfully!
+            {
+                chunk->info.packedSize -= (16 + 24 + 16);    // We remove additional data size from packed size
+                RRES_LOG("RRES: %c%c%c%c: Data decrypted successfully (XChaCha20)\n", chunk->info.type[0], chunk->info.type[1], chunk->info.type[2], chunk->info.type[3]);
+            }
+            else if (decryptResult == -1)
+            {
+                result = 2;   // Wrong password or message corrupted
+                RRES_LOG("RRES: WARNING: %c%c%c%c: Data decryption failed, wrong password or corrupted data\n", chunk->info.type[0], chunk->info.type[1], chunk->info.type[2], chunk->info.type[3]);
+            }
+        } break;
+#endif
+        default: 
+        {
+            result = 1;    // Decryption algorithm not supported
+            RRES_LOG("RRES: WARNING: %c%c%c%c: Chunk data encryption algorithm not supported\n", chunk->info.type[0], chunk->info.type[1], chunk->info.type[2], chunk->info.type[3]);
+
+        } break;
+    }
+
+    if ((result == 0) && (chunk->info.cipherType != RRES_CIPHER_NONE))
+    {
+        // Data is not encrypted any more, register it
+        chunk->info.cipherType = RRES_CIPHER_NONE;
+        updateProps = true;
+    }
+
+    // STEP 2: Data decompression (if decryption was successful)
+    //-------------------------------------------------------------------------------------
+    unsigned char *uncompData = NULL;
+
+    if (result == 0)
+    {
+        switch (chunk->info.compType)
+        {
+            case RRES_COMP_NONE: unpackedData = decryptedData; break;
+            case RRES_COMP_DEFLATE:
+            {
+                int uncompDataSize = 0;
+
+                // TODO: WARNING: Possible issue with allocators: RL_CALLOC() vs RRES_CALLOC()
+                uncompData = DecompressData(decryptedData, chunk->info.packedSize, &uncompDataSize);
+
+                if ((uncompData != NULL) && (uncompDataSize > 0))     // Decompression successful
+                {
+                    unpackedData = uncompData;
+                    chunk->info.packedSize = uncompDataSize;
+                    RRES_LOG("RRES: %c%c%c%c: Data decompressed successfully (DEFLATE)\n", chunk->info.type[0], chunk->info.type[1], chunk->info.type[2], chunk->info.type[3]);
+                }
+                else
+                {
+                    result = 4;    // Decompression process failed
+                    RRES_LOG("RRES: WARNING: %c%c%c%c: Chunk data decompression failed\n", chunk->info.type[0], chunk->info.type[1], chunk->info.type[2], chunk->info.type[3]);
+                }
+
+                // Security check, uncompDataSize must match the provided chunk->baseSize
+                if (uncompDataSize != chunk->info.baseSize) RRES_LOG("RRES: WARNING: Decompressed data could be corrupted, unexpected size\n");
+            } break;
+#if defined(RRES_SUPPORT_COMPRESSION_LZ4)
+            case RRES_COMP_LZ4:
+            {
+                int uncompDataSize = 0;
+                uncompData = (unsigned char *)RRES_CALLOC(chunk->info.baseSize, 1);
+                uncompDataSize = LZ4_decompress_safe(decryptedData, uncompData, chunk->info.packedSize, chunk->info.baseSize);
+
+                if ((uncompData != NULL) && (uncompDataSize > 0))     // Decompression successful
+                {
+                    unpackedData = uncompData;
+                    chunk->info.packedSize = uncompDataSize;
+                    RRES_LOG("RRES: %c%c%c%c: Data decompressed successfully (LZ4)\n", chunk->info.type[0], chunk->info.type[1], chunk->info.type[2], chunk->info.type[3]);
+                }
+                else
+                {
+                    result = 4;    // Decompression process failed
+                    RRES_LOG("RRES: WARNING: %c%c%c%c: Chunk data decompression failed\n", chunk->info.type[0], chunk->info.type[1], chunk->info.type[2], chunk->info.type[3]);
+                }
+
+                // WARNING: Decompression could be successful but not the original message size returned
+                if (uncompDataSize != chunk->info.baseSize) RRES_LOG("RRES: WARNING: Decompressed data could be corrupted, unexpected size\n");
+            } break;
+#endif
+            case RRES_COMP_QOI:
+            {
+                int uncompDataSize = 0;
+                qoi_desc desc = { 0 };
+
+                // TODO: WARNING: Possible issue with allocators: QOI_MALLOC() vs RRES_MALLOC()
+                uncompData = qoi_decode(decryptedData, chunk->info.packedSize, &desc, 0);
+                uncompDataSize = (desc.width*desc.height*desc.channels) + 20;   // Add the 20 bytes of (propCount + props[4])
+
+                if ((uncompData != NULL) && (uncompDataSize > 0))     // Decompression successful
+                {
+                    unpackedData = uncompData;
+                    chunk->info.packedSize = uncompDataSize;
+                    RRES_LOG("RRES: %c%c%c%c: Data decompressed successfully (QOI)\n", chunk->info.type[0], chunk->info.type[1], chunk->info.type[2], chunk->info.type[3]);
+                }
+                else
+                {
+                    result = 4;    // Decompression process failed
+                    RRES_LOG("RRES: WARNING: %c%c%c%c: Chunk data decompression failed\n", chunk->info.type[0], chunk->info.type[1], chunk->info.type[2], chunk->info.type[3]);
+                }
+
+                if (uncompDataSize != chunk->info.baseSize) RRES_LOG("RRES: WARNING: Decompressed data could be corrupted, unexpected size\n");
+            } break;
+            default:
+            {
+                result = 3;
+                RRES_LOG("RRES: WARNING: %c%c%c%c: Chunk data compression algorithm not supported\n", chunk->info.type[0], chunk->info.type[1], chunk->info.type[2], chunk->info.type[3]);
+            } break;
+        }
+    }
+
+    if ((result == 0) && (chunk->info.compType != RRES_COMP_NONE))
+    {
+        // Data is not encrypted any more, register it
+        chunk->info.compType = RRES_COMP_NONE;
+        updateProps = true;
+    }
+
+    // Update chunk->data.propCount and chunk->data.props if required
+    if (updateProps && (unpackedData != NULL))
+    {
+        // Data is decompressed/decrypted into chunk->data.raw but data.propCount and data.props[] are still empty, 
+        // they must be filled with the just updated chunk->data.raw (that contains everything)
+        chunk->data.propCount = ((int *)unpackedData)[0];
+
+        if (chunk->data.propCount > 0)
+        {
+            chunk->data.props = (unsigned int *)RRES_CALLOC(chunk->data.propCount, sizeof(int));
+            for (unsigned int i = 0; i < chunk->data.propCount; i++) chunk->data.props[i] = ((int *)unpackedData)[1 + i];
+        }
+
+        // Move chunk->data.raw pointer (chunk->data.propCount*sizeof(int)) positions
+        void *raw = RRES_CALLOC(chunk->info.baseSize - 20, 1);
+        if (raw != NULL) memcpy(raw, ((unsigned char *)unpackedData) + 20, chunk->info.baseSize - 20);
+        RRES_FREE(chunk->data.raw);
+        chunk->data.raw = raw;
+        RL_FREE(unpackedData);
+    }
+
+    return result;
+}
+
+//----------------------------------------------------------------------------------
+// Module specific Functions Definition
+//----------------------------------------------------------------------------------
+
+// Load data chunk: RRES_DATA_LINK
+static void *LoadDataFromResourceLink(rresResourceChunk chunk, unsigned int *size)
+{
+    unsigned char fullFilePath[2048] = { 0 };
+    void *data = NULL;
+    *size = 0;
+
+    // Get external link filepath
+    unsigned char *linkFilePath = RL_CALLOC(chunk.data.props[0], 1);
+    if (linkFilePath != NULL) memcpy(linkFilePath, chunk.data.raw, chunk.data.props[0]);
+
+    // Get base directory to append filepath if not provided by user
+    if (baseDir == NULL) baseDir = GetApplicationDirectory();
+    
+    strcpy(fullFilePath, baseDir);
+    strcat(fullFilePath, linkFilePath);
+
+    RRES_LOG("RRES: %c%c%c%c: Data file linked externally: %s\n", chunk.info.type[0], chunk.info.type[1], chunk.info.type[2], chunk.info.type[3], linkFilePath);
+
+    if (FileExists(fullFilePath))
+    {
+        // Load external file as raw data
+        // NOTE: We check if file is a text file to allow automatic line-endings processing
+        if (IsFileExtension(linkFilePath, ".txt;.md;.vs;.fs;.info;.c;.h;.json;.xml;.glsl"))     // Text file
+        {
+            data = LoadFileText(fullFilePath);
+            *size = TextLength(data);
+        }
+        else data = LoadFileData(fullFilePath, size);
+
+        if ((data != NULL) && (*size > 0)) RRES_LOG("RRES: %c%c%c%c: External linked file loaded successfully\n", chunk.info.type[0], chunk.info.type[1], chunk.info.type[2], chunk.info.type[3]);
+    }
+    else RRES_LOG("RRES: WARNING: [%s] Linked external file could not be found\n", linkFilePath);
+
+    return data;
+}
+
+// Load data chunk: RRES_DATA_RAW
+// NOTE: This chunk can be used raw files embedding or other binary blobs
+static void *LoadDataFromResourceChunk(rresResourceChunk chunk, unsigned int *size)
+{
+    void *rawData = NULL;
+
+    if ((chunk.info.compType == RRES_COMP_NONE) && (chunk.info.cipherType == RRES_CIPHER_NONE))
+    {
+        rawData = RL_CALLOC(chunk.data.props[0], 1);
+        if (rawData != NULL) memcpy(rawData, chunk.data.raw, chunk.data.props[0]);
+        *size = chunk.data.props[0];
+    }
+    else RRES_LOG("RRES: %c%c%c%c: WARNING: Data must be decompressed/decrypted\n", chunk.info.type[0], chunk.info.type[1], chunk.info.type[2], chunk.info.type[3]);
+
+    return rawData;
+}
+
+// Load data chunk: RRES_DATA_TEXT
+// NOTE: This chunk can be used for shaders or other text data elements (materials?)
+static char *LoadTextFromResourceChunk(rresResourceChunk chunk, unsigned int *codeLang)
+{
+    void *text = NULL;
+
+    if ((chunk.info.compType == RRES_COMP_NONE) && (chunk.info.cipherType == RRES_CIPHER_NONE))
+    {
+        text = (char *)RL_CALLOC(chunk.data.props[0] + 1, 1);    // We add NULL terminator, just in case
+        if (text != NULL) memcpy(text, chunk.data.raw, chunk.data.props[0]);
+
+        // TODO: We got some extra text properties, in case they could be useful for users:
+        // chunk.props[1]:rresTextEncoding, chunk.props[2]:rresCodeLang, chunk. props[3]:cultureCode
+        *codeLang = chunk.data.props[2];
+        //chunks.props[3]:cultureCode could be useful for localized text
+    }
+    else RRES_LOG("RRES: %c%c%c%c: WARNING: Data must be decompressed/decrypted\n", chunk.info.type[0], chunk.info.type[1], chunk.info.type[2], chunk.info.type[3]);
+
+    return text;
+}
+
+// Load data chunk: RRES_DATA_IMAGE
+// NOTE: Many data types use images data in some way (font, material...)
+static Image LoadImageFromResourceChunk(rresResourceChunk chunk)
+{
+    Image image = { 0 };
+
+    if ((chunk.info.compType == RRES_COMP_NONE) && (chunk.info.cipherType == RRES_CIPHER_NONE))
+    {
+        image.width = chunk.data.props[0];
+        image.height = chunk.data.props[1];
+        int format = chunk.data.props[2];
+
+        // Assign equivalent pixel formats for our engine
+        // NOTE: In this case rresPixelFormat defined values match raylib PixelFormat values
+        switch (format)
+        {
+            case RRES_PIXELFORMAT_UNCOMP_GRAYSCALE: image.format = PIXELFORMAT_UNCOMPRESSED_GRAYSCALE; break;
+            case RRES_PIXELFORMAT_UNCOMP_GRAY_ALPHA: image.format = PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA; break;
+            case RRES_PIXELFORMAT_UNCOMP_R5G6B5: image.format = PIXELFORMAT_UNCOMPRESSED_R5G6B5; break;
+            case RRES_PIXELFORMAT_UNCOMP_R8G8B8: image.format = PIXELFORMAT_UNCOMPRESSED_R8G8B8; break;
+            case RRES_PIXELFORMAT_UNCOMP_R5G5B5A1: image.format = PIXELFORMAT_UNCOMPRESSED_R5G5B5A1; break;
+            case RRES_PIXELFORMAT_UNCOMP_R4G4B4A4: image.format = PIXELFORMAT_UNCOMPRESSED_R4G4B4A4; break;
+            case RRES_PIXELFORMAT_UNCOMP_R8G8B8A8: image.format = PIXELFORMAT_UNCOMPRESSED_R8G8B8A8; break;
+            case RRES_PIXELFORMAT_UNCOMP_R32: image.format = PIXELFORMAT_UNCOMPRESSED_R32; break;
+            case RRES_PIXELFORMAT_UNCOMP_R32G32B32: image.format = PIXELFORMAT_UNCOMPRESSED_R32G32B32; break;
+            case RRES_PIXELFORMAT_UNCOMP_R32G32B32A32: image.format = PIXELFORMAT_UNCOMPRESSED_R32G32B32A32; break;
+            case RRES_PIXELFORMAT_COMP_DXT1_RGB: image.format = PIXELFORMAT_COMPRESSED_DXT1_RGB; break;
+            case RRES_PIXELFORMAT_COMP_DXT1_RGBA: image.format = PIXELFORMAT_COMPRESSED_DXT1_RGBA; break;
+            case RRES_PIXELFORMAT_COMP_DXT3_RGBA: image.format = PIXELFORMAT_COMPRESSED_DXT3_RGBA; break;
+            case RRES_PIXELFORMAT_COMP_DXT5_RGBA: image.format = PIXELFORMAT_COMPRESSED_DXT5_RGBA; break;
+            case RRES_PIXELFORMAT_COMP_ETC1_RGB: image.format = PIXELFORMAT_COMPRESSED_ETC1_RGB; break;
+            case RRES_PIXELFORMAT_COMP_ETC2_RGB: image.format = PIXELFORMAT_COMPRESSED_ETC2_RGB; break;
+            case RRES_PIXELFORMAT_COMP_ETC2_EAC_RGBA: image.format = PIXELFORMAT_COMPRESSED_ETC2_EAC_RGBA; break;
+            case RRES_PIXELFORMAT_COMP_PVRT_RGB: image.format = PIXELFORMAT_COMPRESSED_PVRT_RGB; break;
+            case RRES_PIXELFORMAT_COMP_PVRT_RGBA: image.format = PIXELFORMAT_COMPRESSED_PVRT_RGBA; break;
+            case RRES_PIXELFORMAT_COMP_ASTC_4x4_RGBA: image.format = PIXELFORMAT_COMPRESSED_ASTC_4x4_RGBA; break;
+            case RRES_PIXELFORMAT_COMP_ASTC_8x8_RGBA: image.format = PIXELFORMAT_COMPRESSED_ASTC_8x8_RGBA; break;
+            default: break;
+        }
+
+        image.mipmaps = chunk.data.props[3];
+
+        // Image data size can be computed from image properties
+        unsigned int size = GetPixelDataSize(image.width, image.height, image.format);
+
+        // NOTE: Computed image data must match the data size of the chunk processed (minus propCount + props[4] size)
+        if (size == (chunk.info.baseSize - 20))
+        {
+            image.data = RL_CALLOC(size, 1);
+            if (image.data != NULL) memcpy(image.data, chunk.data.raw, size);
+        }
+        else RRES_LOG("RRES: WARNING: IMGE: Chunk data size do not match expected image data size\n");
+    }
+    else RRES_LOG("RRES: %c%c%c%c: WARNING: Data must be decompressed/decrypted\n", chunk.info.type[0], chunk.info.type[1], chunk.info.type[2], chunk.info.type[3]);
+
+    return image;
+}
+
+// Get file extension from RRES_DATA_RAW properties (unsigned int) 
+static const char *GetExtensionFromProps(unsigned int ext01, unsigned int ext02)
+{
+    static char extension[8] = { 0 };
+    memset(extension, 0, 8);
+
+    // Convert file extension provided as 2 unsigned int properties, to a char[] array 
+    // NOTE: Extension is defined as 2 unsigned int big-endian values (4 bytes each), 
+    // starting with a dot, i.e 0x2e706e67 => ".png"
+    extension[0] = (unsigned char)((ext01 & 0xff000000) >> 24);
+    extension[1] = (unsigned char)((ext01 & 0x00ff0000) >> 16);
+    extension[2] = (unsigned char)((ext01 & 0x0000ff00) >> 8);
+    extension[3] = (unsigned char)(ext01 & 0x000000ff);
+
+    extension[4] = (unsigned char)((ext02 & 0xff000000) >> 24);
+    extension[5] = (unsigned char)((ext02 & 0x00ff0000) >> 16);
+    extension[6] = (unsigned char)((ext02 & 0x0000ff00) >> 8);
+    extension[7] = (unsigned char)(ext02 & 0x000000ff);
+
+    return extension;
+}
+
+// Compute MD5 hash code, returns 4 integers array (static)
+static unsigned int *ComputeMD5(unsigned char *data, int size)
+{
+#define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (32 - (c))))
+
+    static unsigned int hash[4] = { 0 };
+
+    // NOTE: All variables are unsigned 32 bit and wrap modulo 2^32 when calculating
+
+    // r specifies the per-round shift amounts
+    unsigned int r[] = {
+        7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
+        5,  9, 14, 20, 5,  9, 14, 20, 5,  9, 14, 20, 5,  9, 14, 20,
+        4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
+        6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21
+    };
+
+    // Use binary integer part of the sines of integers (in radians) as constants// Initialize variables:
+    unsigned int k[] = {
+        0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
+        0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
+        0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
+        0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
+        0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
+        0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
+        0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
+        0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
+        0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
+        0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
+        0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
+        0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
+        0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
+        0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
+        0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
+        0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
+    };
+
+    hash[0] = 0x67452301;
+    hash[1] = 0xefcdab89;
+    hash[2] = 0x98badcfe;
+    hash[3] = 0x10325476;
+
+    // Pre-processing: adding a single 1 bit
+    // Append '1' bit to message
+    // NOTE: The input bytes are considered as bits strings,
+    // where the first bit is the most significant bit of the byte
+
+    // Pre-processing: padding with zeros
+    // Append '0' bit until message length in bit 448 (mod 512)
+    // Append length mod (2 pow 64) to message
+
+    int newDataSize = ((((size + 8)/64) + 1)*64) - 8;
+
+    unsigned char *msg = RL_CALLOC(newDataSize + 64, 1);   // Also appends "0" bits (we alloc also 64 extra bytes...)
+    memcpy(msg, data, size);
+    msg[size] = 128;                 // Write the "1" bit
+
+    unsigned int bitsLen = 8*size;
+    memcpy(msg + newDataSize, &bitsLen, 4);  // We append the len in bits at the end of the buffer
+
+    // Process the message in successive 512-bit chunks for each 512-bit chunk of message
+    for (int offset = 0; offset < newDataSize; offset += (512/8))
+    {
+        // Break chunk into sixteen 32-bit words w[j], 0 <= j <= 15
+        unsigned int *w = (unsigned int *)(msg + offset);
+
+        // Initialize hash value for this chunk
+        unsigned int a = hash[0];
+        unsigned int b = hash[1];
+        unsigned int c = hash[2];
+        unsigned int d = hash[3];
+
+        for (int i = 0; i < 64; i++)
+        {
+            unsigned int f, g;
+
+            if (i < 16)
+            {
+                f = (b & c) | ((~b) & d);
+                g = i;
+            }
+            else if (i < 32)
+            {
+                f = (d & b) | ((~d) & c);
+                g = (5*i + 1)%16;
+            }
+            else if (i < 48)
+            {
+                f = b ^ c ^ d;
+                g = (3*i + 5)%16;
+            }
+            else
+            {
+                f = c ^ (b | (~d));
+                g = (7*i)%16;
+            }
+
+            unsigned int temp = d;
+            d = c;
+            c = b;
+            b = b + LEFTROTATE((a + f + k[i] + w[g]), r[i]);
+            a = temp;
+        }
+
+        // Add chunk's hash to result so far
+        hash[0] += a;
+        hash[1] += b;
+        hash[2] += c;
+        hash[3] += d;
+    }
+
+    RL_FREE(msg);
+
+    return hash;
+}
+
+#endif // RRES_RAYLIB_IMPLEMENTATION
diff --git a/include/extern/rres.h b/include/extern/rres.h
new file mode 100644
index 0000000..eac16c4
--- /dev/null
+++ b/include/extern/rres.h
@@ -0,0 +1,1091 @@
+/**********************************************************************************************
+*
+*   rres v1.0 - A simple and easy-to-use file-format to package resources
+*
+*   CONFIGURATION:
+*
+*   #define RRES_IMPLEMENTATION
+*       Generates the implementation of the library into the included file.
+*       If not defined, the library is in header only mode and can be included in other headers
+*       or source files without problems. But only ONE file should hold the implementation.
+*
+*   FEATURES:
+* 
+*     - Multi-resource files: Some files could end-up generating multiple connected resources in
+*       the rres output file (i.e TTF files could generate RRES_DATA_FONT_GLYPHS and RRES_DATA_IMAGE).
+*     - File packaging as raw resource data: Avoid data processing and just package the file bytes.
+*     - Per-file data compression/encryption: Configure compression/encription for every input file.
+*     - Externally linked files: Package only the file path, to be loaded from external file when the
+*       specific id is requested. WARNING: Be careful with path, it should be relative to application dir.
+*     - Central Directory resource (optional): Create a central directory with the input filename relation
+*       to the resource(s) id. This is the default option but it can be avoided; in that case, a header
+*       file (.h) is generated with the file ids definitions.
+*
+*   FILE STRUCTURE:
+*
+*   rres files consist of a file header followed by a number of resource chunks.
+*
+*   Optionally it can contain a Central Directory resource chunk (usually at the end) with the info
+*   of all the files processed into the rres file.
+*
+*   NOTE: Chunks count could not match files count, some processed files (i.e Font, Mesh)
+*   could generate multiple chunks with the same id related by the rresResourceChunkInfo.nextOffset
+*   Those chunks are loaded together when resource is loaded
+*
+*   rresFileHeader               (16 bytes)
+*       Signature Id              (4 bytes)     // File signature id: 'rres'
+*       Version                   (2 bytes)     // Format version
+*       Resource Count            (2 bytes)     // Number of resource chunks contained
+*       CD Offset                 (4 bytes)     // Central Directory offset (if available)
+*       Reserved                  (4 bytes)     // <reserved>
+*
+*   rresResourceChunk[]
+*   {
+*       rresResourceChunkInfo   (32 bytes)
+*           Type                  (4 bytes)     // Resource type (FourCC)
+*           Id                    (4 bytes)     // Resource identifier (CRC32 filename hash or custom)
+*           Compressor            (1 byte)      // Data compression algorithm
+*           Cipher                (1 byte)      // Data encryption algorithm
+*           Flags                 (2 bytes)     // Data flags (if required)
+*           Data Packed Size      (4 bytes)     // Data packed size (compressed/encrypted + custom data appended)
+*           Data Base Size        (4 bytes)     // Data base size (uncompressed/unencrypted)
+*           Next Offset           (4 bytes)     // Next resource chunk offset (if required)
+*           Reserved              (4 bytes)     // <reserved>
+*           CRC32                 (4 bytes)     // Resource Data Chunk CRC32
+*
+*       rresResourceChunkData     (n bytes)     // Packed data
+*           Property Count        (4 bytes)     // Number of properties contained
+*           Properties[]          (4*i bytes)   // Resource data required properties, depend on Type
+*           Data                  (m bytes)     // Resource data
+*   }
+*
+*   rresResourceChunk: RRES_DATA_DIRECTORY      // Central directory (special resource chunk)
+*   {
+*       rresResourceChunkInfo   (32 bytes)
+*
+*       rresCentralDir            (n bytes)     // rresResourceChunkData
+*           Entries Count         (4 bytes)     // Central directory entries count (files)
+*           rresDirEntry[]
+*           {
+*               Id                (4 bytes)     // Resource id
+*               Offset            (4 bytes)     // Resource global offset in file
+*               reserved          (4 bytes)     // <reserved>
+*               FileName Size     (4 bytes)     // Resource fileName size (NULL terminator and 4-bytes align padding considered)
+*               FileName          (m bytes)     // Resource original fileName (NULL terminated and padded to 4-byte alignment)
+*           }
+*    }
+*
+*   DESIGN DECISIONS / LIMITATIONS:
+*
+*     - rres file maximum chunks: 65535 (16bit chunk count in rresFileHeader)
+*     - rres file maximum size: 4GB (chunk offset and Central Directory Offset is 32bit, so it can not address more than 4GB
+*     - Chunk search by ID is done one by one, starting at first chunk and accessed with fread() function
+*     - Endianness: rres does not care about endianness, data is stored as desired by the host platform (most probably Little Endian)
+*       Endianness won't affect chunk data but it will affect rresFileHeader and rresResourceChunkInfo
+*     - CRC32 hash is used to to generate the rres file identifier from filename
+*       There is a "small" probability of random collision (1 in 2^32 approx.) but considering
+*       the chance of collision is related to the number of data inputs, not the size of the inputs, we assume that risk
+*       Also note that CRC32 is not used as a security/cryptographic hash, just an identifier for the input file
+*     - CRC32 hash is also used to detect chunk data corruption. CRC32 is smaller and computationally much less complex than MD5 or SHA1.
+*       Using a hash function like MD5 is probably overkill for random error detection
+*     - Central Directory rresDirEntry.fileName is NULL terminated and padded to 4-byte, rresDirEntry.fileNameSize considers the padding
+*     - Compression and Encryption. rres supports chunks data compression and encryption, it provides two fields in the rresResourceChunkInfo to
+*       note it, but in those cases is up to the user to implement the desired compressor/uncompressor and encryption/decryption mechanisms
+*       In case of data encryption, it's recommended that any additional resource data (i.e. MAC) to be appended to data chunk and properly
+*       noted in the packed data size field of rresResourceChunkInfo. Data compression should be applied before encryption.
+*
+*   DEPENDENCIES:
+*
+*   rres library dependencies has been keep to the minimum. It depends only some libc functionality:
+*
+*     - stdlib.h: Required for memory allocation: malloc(), calloc(), free()
+*                 NOTE: Allocators can be redefined with macros RRES_MALLOC, RRES_CALLOC, RRES_FREE
+*     - stdio.h:  Required for file access functionality: FILE, fopen(), fseek(), fread(), fclose()
+*     - string.h: Required for memory data management: memcpy(), memcmp()
+*
+*   VERSION HISTORY:
+*
+*     - 1.0 (12-May-2022): Implementation review for better alignment with rres specs
+*     - 0.9 (28-Apr-2022): Initial implementation of rres specs
+*
+*
+*   LICENSE: MIT
+*
+*   Copyright (c) 2016-2022 Ramon Santamaria (@raysan5)
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a copy
+*   of this software and associated documentation files (the "Software"), to deal
+*   in the Software without restriction, including without limitation the rights
+*   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+*   copies of the Software, and to permit persons to whom the Software is
+*   furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included in all
+*   copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+*   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+**********************************************************************************************/
+
+#ifndef RRES_H
+#define RRES_H
+
+// Function specifiers in case library is build/used as a shared library (Windows)
+// NOTE: Microsoft specifiers to tell compiler that symbols are imported/exported from a .dll
+#if defined(_WIN32)
+    #if defined(BUILD_LIBTYPE_SHARED)
+        #define RRESAPI __declspec(dllexport)     // We are building the library as a Win32 shared library (.dll)
+    #elif defined(USE_LIBTYPE_SHARED)
+        #define RRESAPI __declspec(dllimport)     // We are using the library as a Win32 shared library (.dll)
+    #endif
+#endif
+
+// Function specifiers definition
+#ifndef RRESAPI
+    #define RRESAPI       // Functions defined as 'extern' by default (implicit specifiers)
+#endif
+
+//----------------------------------------------------------------------------------
+// Defines and Macros
+//----------------------------------------------------------------------------------
+
+// Allow custom memory allocators
+#ifndef RRES_MALLOC
+    #define RRES_MALLOC(sz)         malloc(sz)
+#endif
+#ifndef RRES_CALLOC
+    #define RRES_CALLOC(ptr,sz)     calloc(ptr,sz)
+#endif
+#ifndef RRES_REALLOC
+    #define RRES_REALLOC(ptr,sz)    realloc(ptr,sz)
+#endif
+#ifndef RRES_FREE
+    #define RRES_FREE(ptr)          free(ptr)
+#endif
+
+// Simple log system to avoid printf() calls if required
+// NOTE: Avoiding those calls, also avoids const strings memory usage
+#define RRES_SUPPORT_LOG_INFO
+#if defined(RRES_SUPPORT_LOG_INFO)
+    #define RRES_LOG(...) printf(__VA_ARGS__)
+#else
+    #define RRES_LOG(...)
+#endif
+
+#define RRES_MAX_FILENAME_SIZE      1024
+
+//----------------------------------------------------------------------------------
+// Types and Structures Definition
+//----------------------------------------------------------------------------------
+// rres file header (16 bytes)
+typedef struct rresFileHeader {
+    unsigned char id[4];            // File identifier: rres
+    unsigned short version;         // File version: 100 for version 1.0
+    unsigned short chunkCount;      // Number of resource chunks in the file (MAX: 65535)
+    unsigned int cdOffset;          // Central Directory offset in file (0 if not available)
+    unsigned int reserved;          // <reserved>
+} rresFileHeader;
+
+// rres resource chunk info header (32 bytes)
+typedef struct rresResourceChunkInfo {
+    unsigned char type[4];          // Resource chunk type (FourCC)
+    unsigned int id;                // Resource chunk identifier (generated from filename CRC32 hash)
+    unsigned char compType;         // Data compression algorithm
+    unsigned char cipherType;       // Data encription algorithm
+    unsigned short flags;           // Data flags (if required)
+    unsigned int packedSize;        // Data chunk size (compressed/encrypted + custom data appended)
+    unsigned int baseSize;          // Data base size (uncompressed/unencrypted)
+    unsigned int nextOffset;        // Next resource chunk global offset (if resource has multiple chunks)
+    unsigned int reserved;          // <reserved>
+    unsigned int crc32;             // Data chunk CRC32 (propCount + props[] + data)
+} rresResourceChunkInfo;
+
+// rres resource chunk data
+typedef struct rresResourceChunkData {
+    unsigned int propCount;         // Resource chunk properties count
+    unsigned int *props;            // Resource chunk properties
+    void *raw;                      // Resource chunk raw data
+} rresResourceChunkData;
+
+// rres resource chunk
+typedef struct rresResourceChunk {
+    rresResourceChunkInfo info;     // Resource chunk info
+    rresResourceChunkData data;     // Resource chunk packed data, contains propCount, props[] and raw data
+} rresResourceChunk;
+
+// rres resource multi
+// NOTE: It supports multiple resource chunks
+typedef struct rresResourceMulti {
+    unsigned int count;             // Resource chunks count
+    rresResourceChunk *chunks;      // Resource chunks
+} rresResourceMulti;
+
+// Useful data types for specific chunk types
+//----------------------------------------------------------------------
+// CDIR: rres central directory entry
+typedef struct rresDirEntry {
+    unsigned int id;                // Resource id
+    unsigned int offset;            // Resource global offset in file
+    unsigned int reserved;          // reserved
+    unsigned int fileNameSize;      // Resource fileName size (NULL terminator and 4-byte alignment padding considered)
+    char fileName[RRES_MAX_FILENAME_SIZE];  // Resource original fileName (NULL terminated and padded to 4-byte alignment)
+} rresDirEntry;
+
+// CDIR: rres central directory
+// NOTE: This data conforms the rresResourceChunkData
+typedef struct rresCentralDir {
+    unsigned int count;             // Central directory entries count
+    rresDirEntry *entries;          // Central directory entries
+} rresCentralDir;
+
+// FNTG: rres font glyphs info (32 bytes)
+// NOTE: And array of this type conforms the rresResourceChunkData
+typedef struct rresFontGlyphInfo {
+    int x, y, width, height;        // Glyph rectangle in the atlas image
+    int value;                      // Glyph codepoint value
+    int offsetX, offsetY;           // Glyph drawing offset (from base line)
+    int advanceX;                   // Glyph advance X for next character
+} rresFontGlyphInfo;
+
+//----------------------------------------------------------------------------------
+// Enums Definition
+// The following enums are useful to fill some fields of the rresResourceChunkInfo
+// and also some fields of the different data types properties
+//----------------------------------------------------------------------------------
+
+// rres resource chunk data type
+// NOTE 1: Data type determines the properties and the data included in every chunk
+// NOTE 2: This enum defines the basic resource data types,
+// some input files could generate multiple resource chunks:
+//   Fonts processed could generate (2) resource chunks:
+//   - [FNTG] rres[0]: RRES_DATA_FONT_GLYPHS
+//   - [IMGE] rres[1]: RRES_DATA_IMAGE
+//
+//   Mesh processed could generate (n) resource chunks:
+//   - [VRTX] rres[0]: RRES_DATA_VERTEX
+//   ...
+//   - [VRTX] rres[n]: RRES_DATA_VERTEX
+typedef enum rresResourceDataType {
+    RRES_DATA_NULL         = 0,             // FourCC: NULL - Reserved for empty chunks, no props/data
+    RRES_DATA_RAW          = 1,             // FourCC: RAWD - Raw file data, 4 properties
+                                            //    props[0]:size (bytes)
+                                            //    props[1]:extension01 (big-endian: ".png" = 0x2e706e67)
+                                            //    props[2]:extension02 (additional part, extensions with +3 letters)
+                                            //    props[3]:reserved
+                                            //    data: raw bytes
+    RRES_DATA_TEXT         = 2,             // FourCC: TEXT - Text file data, 4 properties
+                                            //    props[0]:size (bytes)
+                                            //    props[1]:rresTextEncoding
+                                            //    props[2]:rresCodeLang
+                                            //    props[3]:cultureCode
+                                            //    data: text
+    RRES_DATA_IMAGE        = 3,             // FourCC: IMGE - Image file data, 4 properties
+                                            //    props[0]:width
+                                            //    props[1]:height
+                                            //    props[2]:rresPixelFormat
+                                            //    props[3]:mipmaps
+                                            //    data: pixels
+    RRES_DATA_WAVE         = 4,             // FourCC: WAVE - Audio file data, 4 properties
+                                            //    props[0]:frameCount
+                                            //    props[1]:sampleRate
+                                            //    props[2]:sampleSize
+                                            //    props[3]:channels
+                                            //    data: samples
+    RRES_DATA_VERTEX       = 5,             // FourCC: VRTX - Vertex file data, 4 properties
+                                            //    props[0]:vertexCount
+                                            //    props[1]:rresVertexAttribute
+                                            //    props[2]:componentCount
+                                            //    props[3]:rresVertexFormat
+                                            //    data: vertex
+    RRES_DATA_FONT_GLYPHS  = 6,             // FourCC: FNTG - Font glyphs info data, 4 properties
+                                            //    props[0]:baseSize
+                                            //    props[1]:glyphCount
+                                            //    props[2]:glyphPadding
+                                            //    props[3]:rresFontStyle
+                                            //    data: rresFontGlyphInfo[0..glyphCount]
+    RRES_DATA_LINK         = 99,            // FourCC: LINK - External linked file, 1 property
+                                            //    props[0]:size (bytes)
+                                            //    data: filepath (as provided on input)
+    RRES_DATA_DIRECTORY    = 100,           // FourCC: CDIR - Central directory for input files
+                                            //    props[0]:entryCount, 1 property
+                                            //    data: rresDirEntry[0..entryCount]
+
+    // TODO: 2.0: Support resource package types (muti-resource)
+    // NOTE: They contains multiple rresResourceChunk in rresResourceData.raw
+    //RRES_DATA_PACK_FONT    = 110,         // FourCC: PFNT - Resources Pack: Font data, 1 property (2 resource chunks: RRES_DATA_GLYPHS, RRES_DATA_IMAGE)
+                                            //    props[0]:chunkCount
+    //RRES_DATA_PACK_MESH    = 120,         // FourCC: PMSH - Resources Pack: Mesh data, 1 property (n resource chunks: RRES_DATA_VERTEX)
+                                            //    props[0]:chunkCount
+
+    // TODO: Add additional resource data types if required (define props + data)
+
+} rresResourceDataType;
+
+// Compression algorithms
+// Value required by rresResourceChunkInfo.compType
+// NOTE 1: This enum just list some common data compression algorithms for convenience,
+// The rres packer tool and the engine-specific library are responsible to implement the desired ones,
+// NOTE 2: rresResourceChunkInfo.compType is a byte-size value, limited to [0..255]
+typedef enum rresCompressionType {
+    RRES_COMP_NONE          = 0,            // No data compression
+    RRES_COMP_RLE           = 1,            // RLE compression
+    RRES_COMP_DEFLATE       = 10,           // DEFLATE compression
+    RRES_COMP_LZ4           = 20,           // LZ4 compression
+    RRES_COMP_LZMA2         = 30,           // LZMA2 compression
+    RRES_COMP_QOI           = 40,           // QOI compression, useful for RGB(A) image data
+    // TODO: Add additional compression algorithms if required
+} rresCompressionType;
+
+// Encryption algoritms
+// Value required by rresResourceChunkInfo.cipherType
+// NOTE 1: This enum just lists some common data encryption algorithms for convenience,
+// The rres packer tool and the engine-specific library are responsible to implement the desired ones,
+// NOTE 2: Some encryption algorithm could require/generate additional data (seed, salt, nonce, MAC...)
+// in those cases, that extra data must be appended to the original encrypted message and added to the resource data chunk
+// NOTE 3: rresResourceChunkInfo.cipherType is a byte-size value, limited to [0..255]
+typedef enum rresEncryptionType {
+    RRES_CIPHER_NONE        = 0,            // No data encryption
+    RRES_CIPHER_XOR         = 1,            // XOR encryption, generic using 128bit key in blocks
+    RRES_CIPHER_DES         = 10,           // DES encryption
+    RRES_CIPHER_TDES        = 11,           // Triple DES encryption
+    RRES_CIPHER_IDEA        = 20,           // IDEA encryption
+    RRES_CIPHER_AES         = 30,           // AES (128bit or 256bit) encryption
+    RRES_CIPHER_AES_GCM     = 31,           // AES Galois/Counter Mode (Galois Message Authentification Code - GMAC)
+    RRES_CIPHER_XTEA        = 40,           // XTEA encryption
+    RRES_CIPHER_BLOWFISH    = 50,           // BLOWFISH encryption
+    RRES_CIPHER_RSA         = 60,           // RSA asymmetric encryption
+    RRES_CIPHER_SALSA20     = 70,           // SALSA20 encryption
+    RRES_CIPHER_CHACHA20    = 71,           // CHACHA20 encryption
+    RRES_CIPHER_XCHACHA20   = 72,           // XCHACHA20 encryption
+    RRES_CIPHER_XCHACHA20_POLY1305 = 73,    // XCHACHA20 with POLY1305 for message authentification (MAC)
+    // TODO: Add additional encryption algorithm if required
+} rresEncryptionType;
+
+// TODO: rres error codes (not used at this moment)
+// NOTE: Error codes when processing rres files
+typedef enum rresErrorType {
+    RRES_SUCCESS = 0,                       // rres file loaded/saved successfully
+    RRES_ERROR_FILE_NOT_FOUND,              // rres file can not be opened (spelling issues, file actually does not exist...)
+    RRES_ERROR_FILE_FORMAT,                 // rres file format not a supported (wrong header, wrong identifier)
+    RRES_ERROR_MEMORY_ALLOC,                // Memory could not be allocated for operation.
+} rresErrorType;
+
+// Enums required by specific resource types for its properties
+//----------------------------------------------------------------------------------
+// TEXT: Text encoding property values
+typedef enum rresTextEncoding {
+    RRES_TEXT_ENCODING_UNDEFINED = 0,       // Not defined, usually UTF-8
+    RRES_TEXT_ENCODING_UTF8      = 1,       // UTF-8 text encoding
+    RRES_TEXT_ENCODING_UTF8_BOM  = 2,       // UTF-8 text encoding with Byte-Order-Mark
+    RRES_TEXT_ENCODING_UTF16_LE  = 10,      // UTF-16 Little Endian text encoding
+    RRES_TEXT_ENCODING_UTF16_BE  = 11,      // UTF-16 Big Endian text encoding
+    // TODO: Add additional encodings if required
+} rresTextEncoding;
+
+// TEXT: Text code language
+// NOTE: It could be useful for code script resources
+typedef enum rresCodeLang {
+    RRES_CODE_LANG_UNDEFINED = 0,           // Undefined code language, text is plain text
+    RRES_CODE_LANG_C,                       // Text contains C code
+    RRES_CODE_LANG_CPP,                     // Text contains C++ code
+    RRES_CODE_LANG_CS,                      // Text contains C# code
+    RRES_CODE_LANG_LUA,                     // Text contains Lua code
+    RRES_CODE_LANG_JS,                      // Text contains JavaScript code
+    RRES_CODE_LANG_PYTHON,                  // Text contains Python code
+    RRES_CODE_LANG_RUST,                    // Text contains Rust code
+    RRES_CODE_LANG_ZIG,                     // Text contains Zig code
+    RRES_CODE_LANG_ODIN,                    // Text contains Odin code
+    RRES_CODE_LANG_JAI,                     // Text contains Jai code
+    RRES_CODE_LANG_GDSCRIPT,                // Text contains GDScript (Godot) code
+    RRES_CODE_LANG_GLSL,                    // Text contains GLSL shader code
+    // TODO: Add additional code languages if required
+} rresCodeLang;
+
+// IMGE: Image/Texture pixel formats
+typedef enum rresPixelFormat {
+    RRES_PIXELFORMAT_UNDEFINED = 0,
+    RRES_PIXELFORMAT_UNCOMP_GRAYSCALE = 1,  // 8 bit per pixel (no alpha)
+    RRES_PIXELFORMAT_UNCOMP_GRAY_ALPHA,     // 16 bpp (2 channels)
+    RRES_PIXELFORMAT_UNCOMP_R5G6B5,         // 16 bpp
+    RRES_PIXELFORMAT_UNCOMP_R8G8B8,         // 24 bpp
+    RRES_PIXELFORMAT_UNCOMP_R5G5B5A1,       // 16 bpp (1 bit alpha)
+    RRES_PIXELFORMAT_UNCOMP_R4G4B4A4,       // 16 bpp (4 bit alpha)
+    RRES_PIXELFORMAT_UNCOMP_R8G8B8A8,       // 32 bpp
+    RRES_PIXELFORMAT_UNCOMP_R32,            // 32 bpp (1 channel - float)
+    RRES_PIXELFORMAT_UNCOMP_R32G32B32,      // 32*3 bpp (3 channels - float)
+    RRES_PIXELFORMAT_UNCOMP_R32G32B32A32,   // 32*4 bpp (4 channels - float)
+    RRES_PIXELFORMAT_COMP_DXT1_RGB,         // 4 bpp (no alpha)
+    RRES_PIXELFORMAT_COMP_DXT1_RGBA,        // 4 bpp (1 bit alpha)
+    RRES_PIXELFORMAT_COMP_DXT3_RGBA,        // 8 bpp
+    RRES_PIXELFORMAT_COMP_DXT5_RGBA,        // 8 bpp
+    RRES_PIXELFORMAT_COMP_ETC1_RGB,         // 4 bpp
+    RRES_PIXELFORMAT_COMP_ETC2_RGB,         // 4 bpp
+    RRES_PIXELFORMAT_COMP_ETC2_EAC_RGBA,    // 8 bpp
+    RRES_PIXELFORMAT_COMP_PVRT_RGB,         // 4 bpp
+    RRES_PIXELFORMAT_COMP_PVRT_RGBA,        // 4 bpp
+    RRES_PIXELFORMAT_COMP_ASTC_4x4_RGBA,    // 8 bpp
+    RRES_PIXELFORMAT_COMP_ASTC_8x8_RGBA     // 2 bpp
+    // TOO: Add additional pixel formats if required
+} rresPixelFormat;
+
+// VRTX: Vertex data attribute
+// NOTE: The expected number of components for every vertex attributes is provided as a property to data,
+// the listed components count are the expected/default ones
+typedef enum rresVertexAttribute {
+    RRES_VERTEX_ATTRIBUTE_POSITION   = 0,   // Vertex position attribute: [x, y, z]
+    RRES_VERTEX_ATTRIBUTE_TEXCOORD1  = 10,  // Vertex texture coordinates attribute: [u, v]
+    RRES_VERTEX_ATTRIBUTE_TEXCOORD2  = 11,  // Vertex texture coordinates attribute: [u, v]
+    RRES_VERTEX_ATTRIBUTE_TEXCOORD3  = 12,  // Vertex texture coordinates attribute: [u, v]
+    RRES_VERTEX_ATTRIBUTE_TEXCOORD4  = 13,  // Vertex texture coordinates attribute: [u, v]
+    RRES_VERTEX_ATTRIBUTE_NORMAL     = 20,  // Vertex normal attribute: [x, y, z]
+    RRES_VERTEX_ATTRIBUTE_TANGENT    = 30,  // Vertex tangent attribute: [x, y, z, w]
+    RRES_VERTEX_ATTRIBUTE_COLOR      = 40,  // Vertex color attribute: [r, g, b, a]
+    RRES_VERTEX_ATTRIBUTE_INDEX      = 100, // Vertex index attribute: [i]
+    // TODO: Add additional attributes if required
+} rresVertexAttribute;
+
+// VRTX: Vertex data format type
+typedef enum rresVertexFormat {
+    RRES_VERTEX_FORMAT_UBYTE = 0,           // 8 bit unsigned integer data
+    RRES_VERTEX_FORMAT_BYTE,                // 8 bit signed integer data
+    RRES_VERTEX_FORMAT_USHORT,              // 16 bit unsigned integer data
+    RRES_VERTEX_FORMAT_SHORT,               // 16 bit signed integer data
+    RRES_VERTEX_FORMAT_UINT,                // 32 bit unsigned integer data
+    RRES_VERTEX_FORMAT_INT,                 // 32 bit integer data
+    RRES_VERTEX_FORMAT_HFLOAT,              // 16 bit float data
+    RRES_VERTEX_FORMAT_FLOAT,               // 32 bit float data
+    // TODO: Add additional required vertex formats (i.e. normalized data)
+} rresVertexFormat;
+
+// FNTG: Font style
+typedef enum rresFontStyle {
+    RRES_FONT_STYLE_UNDEFINED = 0,          // Undefined font style
+    RRES_FONT_STYLE_REGULAR,                // Regular font style
+    RRES_FONT_STYLE_BOLD,                   // Bold font style
+    RRES_FONT_STYLE_ITALIC,                 // Italic font style
+    // TODO: Add additional font styles if required
+} rresFontStyle;
+
+//----------------------------------------------------------------------------------
+// Global variables
+//----------------------------------------------------------------------------------
+//...
+
+//----------------------------------------------------------------------------------
+// Module Functions Declaration
+//----------------------------------------------------------------------------------
+#ifdef __cplusplus
+extern "C" {            // Prevents name mangling of functions
+#endif
+
+// Load only one resource chunk (first resource id found)
+RRESAPI rresResourceChunk rresLoadResourceChunk(const char *fileName, int rresId);  // Load one resource chunk for provided id
+RRESAPI void rresUnloadResourceChunk(rresResourceChunk chunk);                      // Unload resource chunk from memory
+
+// Load multi resource chunks for a specified rresId
+RRESAPI rresResourceMulti rresLoadResourceMulti(const char *fileName, int rresId);  // Load resource for provided id (multiple resource chunks)
+RRESAPI void rresUnloadResourceMulti(rresResourceMulti multi);                      // Unload resource from memory (multiple resource chunks)
+
+// Load resource(s) chunk info from file
+RRESAPI rresResourceChunkInfo rresLoadResourceChunkInfo(const char *fileName, int rresId);  // Load resource chunk info for provided id
+RRESAPI rresResourceChunkInfo *rresLoadResourceChunkInfoAll(const char *fileName, unsigned int *chunkCount); // Load all resource chunks info
+
+RRESAPI rresCentralDir rresLoadCentralDirectory(const char *fileName);              // Load central directory resource chunk from file
+RRESAPI void rresUnloadCentralDirectory(rresCentralDir dir);                        // Unload central directory resource chunk
+
+RRESAPI unsigned int rresGetDataType(const unsigned char *fourCC);                  // Get rresResourceDataType from FourCC code
+RRESAPI int rresGetResourceId(rresCentralDir dir, const char *fileName);            // Get resource id for a provided filename
+                                                                                    // NOTE: It requires CDIR available in the file (it's optinal by design)
+RRESAPI unsigned int rresComputeCRC32(unsigned char *data, int len);                // Compute CRC32 for provided data
+
+// Manage password for data encryption/decryption
+// NOTE: The cipher password is kept as an internal pointer to provided string, it's up to the user to manage that sensible data properly
+// Password should be to allocate and set before loading an encrypted resource and it should be cleaned/wiped after the encrypted resource has been loaded
+// TODO: Move this functionality to engine-library, after all rres.h does not manage data decryption
+RRESAPI void rresSetCipherPassword(const char *pass);                 // Set password to be used on data decryption
+RRESAPI const char *rresGetCipherPassword(void);                      // Get password to be used on data decryption
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // RRES_H
+
+
+/***********************************************************************************
+*
+*   RRES IMPLEMENTATION
+*
+************************************************************************************/
+
+#if defined(RRES_IMPLEMENTATION)
+
+// Boolean type
+#if (defined(__STDC__) && __STDC_VERSION__ >= 199901L) || (defined(_MSC_VER) && _MSC_VER >= 1800)
+    #include <stdbool.h>
+#elif !defined(__cplusplus) && !defined(bool)
+    typedef enum bool { false = 0, true = !false } bool;
+    #define RL_BOOL_TYPE
+#endif
+
+#include <stdlib.h>                 // Required for: malloc(), free()
+#include <stdio.h>                  // Required for: FILE, fopen(), fseek(), fread(), fclose()
+#include <string.h>                 // Required for: memcpy(), memcmp()
+
+//----------------------------------------------------------------------------------
+// Defines and Macros
+//----------------------------------------------------------------------------------
+//...
+
+//----------------------------------------------------------------------------------
+// Types and Structures Definition
+//----------------------------------------------------------------------------------
+//...
+
+//----------------------------------------------------------------------------------
+// Global Variables Definition
+//----------------------------------------------------------------------------------
+static const char *password = NULL;     // Password pointer, managed by user libraries
+
+//----------------------------------------------------------------------------------
+// Module Internal Functions Declaration
+//----------------------------------------------------------------------------------
+// Load resource chunk packed data into our data struct
+static rresResourceChunkData rresLoadResourceChunkData(rresResourceChunkInfo info, void *packedData);
+
+//----------------------------------------------------------------------------------
+// Module Functions Definition
+//----------------------------------------------------------------------------------
+// Load one resource chunk for provided id
+rresResourceChunk rresLoadResourceChunk(const char *fileName, int rresId)
+{
+    rresResourceChunk chunk = { 0 };
+
+    FILE *rresFile = fopen(fileName, "rb");
+
+    if (rresFile == NULL) RRES_LOG("RRES: WARNING: [%s] rres file could not be opened\n", fileName);
+    else
+    {
+        RRES_LOG("RRES: INFO: Loading resource from file: %s\n", fileName);
+
+        rresFileHeader header = { 0 };
+
+        // Read rres file header
+        fread(&header, sizeof(rresFileHeader), 1, rresFile);
+
+        // Verify file signature: "rres" and file version: 100
+        if (((header.id[0] == 'r') && (header.id[1] == 'r') && (header.id[2] == 'e') && (header.id[3] == 's')) && (header.version == 100))
+        {
+            bool found = false;
+
+            // Check all available chunks looking for the requested id
+            for (int i = 0; i < header.chunkCount; i++)
+            {
+                rresResourceChunkInfo info = { 0 };
+
+                // Read resource info header
+                fread(&info, sizeof(rresResourceChunkInfo), 1, rresFile);
+
+                // Check if resource id is the requested one
+                if (info.id == rresId)
+                {
+                    found = true;
+
+                    RRES_LOG("RRES: INFO: Found requested resource id: 0x%08x\n", info.id);
+                    RRES_LOG("RRES: %c%c%c%c: Id: 0x%08x | Base size: %i | Packed size: %i\n", info.type[0], info.type[1], info.type[2], info.type[3], info.id, info.baseSize, info.packedSize);
+
+                    // NOTE: We only load first matching id resource chunk found but
+                    // we show a message if additional chunks are detected
+                    if (info.nextOffset != 0) RRES_LOG("RRES: WARNING: Multiple linked resource chunks available for the provided id");
+
+                    /*
+                    // Variables required to check multiple chunks
+                    int chunkCount = 0;
+                    long currentFileOffset = ftell(rresFile);           // Store current file position
+                    rresResourceChunkInfo temp = info;                  // Temp info header to scan resource chunks
+
+                    // Count all linked resource chunks checking temp.nextOffset
+                    while (temp.nextOffset != 0)
+                    {
+                        fseek(rresFile, temp.nextOffset, SEEK_SET);     // Jump to next linked resource
+                        fread(&temp, sizeof(rresResourceChunkInfo), 1, rresFile);  // Read next resource info header
+                        chunkCount++;
+                    }
+
+                    fseek(rresFile, currentFileOffset, SEEK_SET);       // Return to first resource chunk position
+                    */
+
+                    // Read and resource chunk from file data
+                    // NOTE: Read data can be compressed/encrypted, it's up to the user library to manage decompression/decryption
+                    void *data = RRES_MALLOC(info.packedSize);    // Allocate enough memory to store resource data chunk
+                    fread(data, info.packedSize, 1, rresFile);    // Read data: propsCount + props[] + data (+additional_data)
+
+                    // Get chunk.data properly organized (only if uncompressed/unencrypted)
+                    chunk.data = rresLoadResourceChunkData(info, data);
+                    chunk.info = info;
+                    
+                    RRES_FREE(data);
+
+                    break;      // Resource id found and loaded, stop checking the file
+                }
+                else
+                {
+                    // Skip required data size to read next resource info header
+                    fseek(rresFile, info.packedSize, SEEK_CUR);
+                }
+            }
+
+            if (!found) RRES_LOG("RRES: WARNING: Requested resource not found: 0x%08x\n", rresId);
+        }
+        else RRES_LOG("RRES: WARNING: The provided file is not a valid rres file, file signature or version not valid\n");
+
+        fclose(rresFile);
+    }
+
+    return chunk;
+}
+
+// Unload resource chunk from memory
+void rresUnloadResourceChunk(rresResourceChunk chunk)
+{
+    RRES_FREE(chunk.data.props);  // Resource chunk properties
+    RRES_FREE(chunk.data.raw);    // Resource chunk raw data
+}
+
+// Load resource from file by id
+// NOTE: All resources conected to base id are loaded
+rresResourceMulti rresLoadResourceMulti(const char *fileName, int rresId)
+{
+    rresResourceMulti rres = { 0 };
+
+    FILE *rresFile = fopen(fileName, "rb");
+
+    if (rresFile == NULL) RRES_LOG("RRES: WARNING: [%s] rres file could not be opened\n", fileName);
+    else
+    {
+        rresFileHeader header = { 0 };
+
+        // Read rres file header
+        fread(&header, sizeof(rresFileHeader), 1, rresFile);
+
+        // Verify file signature: "rres" and file version: 100
+        if (((header.id[0] == 'r') && (header.id[1] == 'r') && (header.id[2] == 'e') && (header.id[3] == 's')) && (header.version == 100))
+        {
+            bool found = false;
+
+            // Check all available chunks looking for the requested id
+            for (int i = 0; i < header.chunkCount; i++)
+            {
+                rresResourceChunkInfo info = { 0 };
+
+                // Read resource info header
+                fread(&info, sizeof(rresResourceChunkInfo), 1, rresFile);
+
+                // Check if resource id is the requested one
+                if (info.id == rresId)
+                {
+                    found = true;
+                    
+                    RRES_LOG("RRES: INFO: Found requested resource id: 0x%08x\n", info.id);
+                    RRES_LOG("RRES: %c%c%c%c: Id: 0x%08x | Base size: %i | Packed size: %i\n", info.type[0], info.type[1], info.type[2], info.type[3], info.id, info.baseSize, info.packedSize);
+
+                    rres.count = 1;
+
+                    long currentFileOffset = ftell(rresFile);               // Store current file position
+                    rresResourceChunkInfo temp = info;                      // Temp info header to scan resource chunks
+
+                    // Count all linked resource chunks checking temp.nextOffset
+                    while (temp.nextOffset != 0)
+                    {
+                        fseek(rresFile, temp.nextOffset, SEEK_SET);         // Jump to next linked resource
+                        fread(&temp, sizeof(rresResourceChunkInfo), 1, rresFile); // Read next resource info header
+                        rres.count++;
+                    }
+
+                    rres.chunks = (rresResourceChunk *)RRES_CALLOC(rres.count, sizeof(rresResourceChunk)); // Load as many rres slots as required
+                    fseek(rresFile, currentFileOffset, SEEK_SET);           // Return to first resource chunk position
+
+                    // Read and load data chunk from file data
+                    // NOTE: Read data can be compressed/encrypted,
+                    // it's up to the user library to manage decompression/decryption
+                    void *data = RRES_MALLOC(info.packedSize);              // Allocate enough memory to store resource data chunk
+                    fread(data, info.packedSize, 1, rresFile);              // Read data: propsCount + props[] + data (+additional_data)
+                    
+                    // Get chunk.data properly organized (only if uncompressed/unencrypted)
+                    rres.chunks[0].data = rresLoadResourceChunkData(info, data);
+                    rres.chunks[0].info = info;
+                    
+                    RRES_FREE(data);
+
+                    int i = 1;
+
+                    // Load all linked resource chunks
+                    while (info.nextOffset != 0)
+                    {
+                        fseek(rresFile, info.nextOffset, SEEK_SET);         // Jump to next resource chunk
+                        fread(&info, sizeof(rresResourceChunkInfo), 1, rresFile); // Read next resource info header
+
+                        RRES_LOG("RRES: %c%c%c%c: Id: 0x%08x | Base size: %i | Packed size: %i\n", info.type[0], info.type[1], info.type[2], info.type[3], info.id, info.baseSize, info.packedSize);
+
+                        void *data = RRES_MALLOC(info.packedSize);          // Allocate enough memory to store resource data chunk
+                        fread(data, info.packedSize, 1, rresFile);          // Read data: propsCount + props[] + data (+additional_data)
+                        
+                        // Get chunk.data properly organized (only if uncompressed/unencrypted)
+                        rres.chunks[i].data = rresLoadResourceChunkData(info, data);
+                        rres.chunks[i].info = info;
+                        
+                        RRES_FREE(data);
+
+                        i++;
+                    }
+
+                    break;      // Resource id found and loaded, stop checking the file
+                }
+                else
+                {
+                    // Skip required data size to read next resource info header
+                    fseek(rresFile, info.packedSize, SEEK_CUR);
+                }
+            }
+            
+            if (!found) RRES_LOG("RRES: WARNING: Requested resource not found: 0x%08x\n", rresId);
+        }
+        else RRES_LOG("RRES: WARNING: The provided file is not a valid rres file, file signature or version not valid\n");
+
+        fclose(rresFile);
+    }
+
+    return rres;
+}
+
+// Unload resource data
+void rresUnloadResourceMulti(rresResourceMulti multi)
+{
+    for (unsigned int i = 0; i < multi.count; i++) rresUnloadResourceChunk(multi.chunks[i]);
+
+    RRES_FREE(multi.chunks);
+}
+
+// Load resource chunk info for provided id
+RRESAPI rresResourceChunkInfo rresLoadResourceChunkInfo(const char *fileName, int rresId)
+{
+    rresResourceChunkInfo info = { 0 };
+    
+    FILE *rresFile = fopen(fileName, "rb");
+
+    if (rresFile != NULL)
+    {
+        rresFileHeader header = { 0 };
+
+        fread(&header, sizeof(rresFileHeader), 1, rresFile);
+
+        // Verify file signature: "rres", file version: 100
+        if (((header.id[0] == 'r') && (header.id[1] == 'r') && (header.id[2] == 'e') && (header.id[3] == 's')) && (header.version == 100))
+        {
+            // Try to find provided resource chunk id and read info chunk
+            for (int i = 0; i < header.chunkCount; i++)
+            {
+                // Read resource chunk info
+                fread(&info, sizeof(rresResourceChunkInfo), 1, rresFile);
+
+                if (info.id == rresId)
+                {
+                    // TODO: Jump to next resource chunk for provided id
+                    //if (info.nextOffset > 0) fseek(rresFile, info.nextOffset, SEEK_SET);
+
+                    break; // If requested rresId is found, we return the read rresResourceChunkInfo
+                }   
+                else fseek(rresFile, info.packedSize, SEEK_CUR); // Jump to next resource
+            }
+        }
+        else RRES_LOG("RRES: WARNING: The provided file is not a valid rres file, file signature or version not valid\n");
+
+        fclose(rresFile);
+    }
+
+    return info;
+}
+
+// Load all resource chunks info
+RRESAPI rresResourceChunkInfo *rresLoadResourceChunkInfoAll(const char *fileName, unsigned int *chunkCount)
+{
+    rresResourceChunkInfo *infos = { 0 };
+    unsigned int count = 0;
+    
+    FILE *rresFile = fopen(fileName, "rb");
+
+    if (rresFile != NULL)
+    {
+        rresFileHeader header = { 0 };
+
+        fread(&header, sizeof(rresFileHeader), 1, rresFile);
+
+        // Verify file signature: "rres", file version: 100
+        if (((header.id[0] == 'r') && (header.id[1] == 'r') && (header.id[2] == 'e') && (header.id[3] == 's')) && (header.version == 100))
+        {
+            // Load all resource chunks info
+            infos = (rresResourceChunkInfo *)RRES_CALLOC(header.chunkCount, sizeof(rresResourceChunkInfo));
+            count = header.chunkCount;
+            
+            for (unsigned int i = 0; i < count; i++)
+            {
+                fread(&infos[i], sizeof(rresResourceChunkInfo), 1, rresFile); // Read resource chunk info
+
+                if (infos[i].nextOffset > 0) fseek(rresFile, infos[i].nextOffset, SEEK_SET); // Jump to next resource
+                else fseek(rresFile, infos[i].packedSize, SEEK_CUR); // Jump to next resource
+            }
+        }
+        else RRES_LOG("RRES: WARNING: The provided file is not a valid rres file, file signature or version not valid\n");
+
+        fclose(rresFile);
+    }
+
+    *chunkCount = count;
+    return infos;
+}
+
+// Load central directory data
+rresCentralDir rresLoadCentralDirectory(const char *fileName)
+{
+    rresCentralDir dir = { 0 };
+
+    FILE *rresFile = fopen(fileName, "rb");
+
+    if (rresFile != NULL)
+    {
+        rresFileHeader header = { 0 };
+
+        fread(&header, sizeof(rresFileHeader), 1, rresFile);
+
+        // Verify file signature: "rres", file version: 100
+        if (((header.id[0] == 'r') && (header.id[1] == 'r') && (header.id[2] == 'e') && (header.id[3] == 's')) && (header.version == 100))
+        {
+            // Check if there is a Central Directory available
+            if (header.cdOffset == 0) RRES_LOG("RRES: WARNING: CDIR: No central directory found\n");
+            else
+            {
+                rresResourceChunkInfo info = { 0 };
+
+                fseek(rresFile, header.cdOffset, SEEK_CUR); // Move to central directory position
+                fread(&info, sizeof(rresResourceChunkInfo), 1, rresFile); // Read resource info
+
+                // Verify resource type is CDIR
+                if ((info.type[0] == 'C') && (info.type[1] == 'D') && (info.type[2] == 'I') && (info.type[3] == 'R'))
+                {
+                    RRES_LOG("RRES: CDIR: Central Directory found at offset: 0x%08x\n", header.cdOffset);
+
+                    void *data = RRES_MALLOC(info.packedSize);
+                    fread(data, info.packedSize, 1, rresFile);
+
+                    // Load resource chunk data (central directory), data is uncompressed/unencrypted by default
+                    rresResourceChunkData chunkData = rresLoadResourceChunkData(info, data);
+                    RRES_FREE(data);
+
+                    dir.count = chunkData.props[0];     // File entries count
+                    
+                    RRES_LOG("RRES: CDIR: Central Directory file entries count: %i\n", dir.count);
+
+                    unsigned char *ptr = chunkData.raw;
+                    dir.entries = (rresDirEntry *)RRES_CALLOC(dir.count, sizeof(rresDirEntry));
+
+                    for (unsigned int i = 0; i < dir.count; i++)
+                    {
+                        dir.entries[i].id = ((int *)ptr)[0];            // Resource id
+                        dir.entries[i].offset = ((int *)ptr)[1];        // Resource offset in file
+                        // NOTE: There is a reserved integer value before fileNameSize
+                        dir.entries[i].fileNameSize = ((int *)ptr)[3];  // Resource fileName size
+
+                        // Resource fileName, NULL terminated and 0-padded to 4-byte,
+                        // fileNameSize considers NULL and padding
+                        memcpy(dir.entries[i].fileName, ptr + 16, dir.entries[i].fileNameSize);
+
+                        ptr += (16 + dir.entries[i].fileNameSize);      // Move pointer for next entry
+                    }
+
+                    RRES_FREE(chunkData.props);
+                    RRES_FREE(chunkData.raw);
+                }
+            }
+        }
+        else RRES_LOG("RRES: WARNING: The provided file is not a valid rres file, file signature or version not valid\n");
+
+        fclose(rresFile);
+    }
+
+    return dir;
+}
+
+// Unload central directory data
+void rresUnloadCentralDirectory(rresCentralDir dir)
+{
+    RRES_FREE(dir.entries);
+}
+
+// Get rresResourceDataType from FourCC code
+// NOTE: Function expects to receive a char[4] array
+unsigned int rresGetDataType(const unsigned char *fourCC)
+{
+    unsigned int type = 0;
+
+    if (fourCC != NULL)
+    {
+        if (memcmp(fourCC, "NULL", 4) == 0) type = RRES_DATA_NULL;              // Reserved for empty chunks, no props/data
+        else if (memcmp(fourCC, "RAWD", 4) == 0) type = RRES_DATA_RAW;          // Raw file data, input file is not processed, just packed as is
+        else if (memcmp(fourCC, "TEXT", 4) == 0) type = RRES_DATA_TEXT;         // Text file data, byte data extracted from text file
+        else if (memcmp(fourCC, "IMGE", 4) == 0) type = RRES_DATA_IMAGE;        // Image file data, pixel data extracted from image file
+        else if (memcmp(fourCC, "WAVE", 4) == 0) type = RRES_DATA_WAVE;         // Audio file data, samples data extracted from audio file
+        else if (memcmp(fourCC, "VRTX", 4) == 0) type = RRES_DATA_VERTEX;       // Vertex file data, extracted from a mesh file
+        else if (memcmp(fourCC, "FNTG", 4) == 0) type = RRES_DATA_FONT_GLYPHS;  // Font glyphs info, generated from an input font file
+        else if (memcmp(fourCC, "LINK", 4) == 0) type = RRES_DATA_LINK;         // External linked file, filepath as provided on file input
+        else if (memcmp(fourCC, "CDIR", 4) == 0) type = RRES_DATA_DIRECTORY;    // Central directory for input files relation to resource chunks
+    }
+
+    /*
+    // Assign type (unsigned int) FourCC (char[4])
+    if ((fourCC[0] == 'N') && (fourCC[1] == 'U') && (fourCC[2] == 'L') && (fourCC[3] == 'L')) type = RRES_DATA_NULL;             // NULL
+    if ((fourCC[0] == 'R') && (fourCC[1] == 'A') && (fourCC[2] == 'W') && (fourCC[3] == 'D')) type = RRES_DATA_RAW;              // RAWD
+    else if ((fourCC[0] == 'T') && (fourCC[1] == 'E') && (fourCC[2] == 'X') && (fourCC[3] == 'T')) type = RRES_DATA_TEXT;        // TEXT
+    else if ((fourCC[0] == 'I') && (fourCC[1] == 'M') && (fourCC[2] == 'G') && (fourCC[3] == 'E')) type = RRES_DATA_IMAGE;       // IMGE
+    else if ((fourCC[0] == 'W') && (fourCC[1] == 'A') && (fourCC[2] == 'V') && (fourCC[3] == 'E')) type = RRES_DATA_WAVE;        // WAVE
+    else if ((fourCC[0] == 'V') && (fourCC[1] == 'R') && (fourCC[2] == 'T') && (fourCC[3] == 'X')) type = RRES_DATA_VERTEX;      // VRTX
+    else if ((fourCC[0] == 'F') && (fourCC[1] == 'N') && (fourCC[2] == 'T') && (fourCC[3] == 'G')) type = RRES_DATA_FONT_GLYPHS; // FNTG
+    else if ((fourCC[0] == 'L') && (fourCC[1] == 'I') && (fourCC[2] == 'N') && (fourCC[3] == 'K')) type = RRES_DATA_LINK;        // LINK
+    else if ((fourCC[0] == 'C') && (fourCC[1] == 'D') && (fourCC[2] == 'I') && (fourCC[3] == 'R')) type = RRES_DATA_DIRECTORY;   // CDIR
+    */
+
+    return type;
+}
+
+// Get resource identifier from filename
+// WARNING: It requires the central directory previously loaded
+int rresGetResourceId(rresCentralDir dir, const char *fileName)
+{
+    int id = 0;
+
+    for (unsigned int i = 0, len = 0; i < dir.count; i++)
+    {
+        len = (unsigned int)strlen(fileName);
+
+        // NOTE: entries[i].fileName is NULL terminated and padded to 4-bytes
+        if (strncmp((const char *)dir.entries[i].fileName, fileName, len) == 0)
+        {
+            id = dir.entries[i].id;
+            break;
+        }
+    }
+
+    return id;
+}
+
+// Compute CRC32 hash
+// NOTE: CRC32 is used as rres id, generated from original filename
+unsigned int rresComputeCRC32(unsigned char *data, int len)
+{
+    static unsigned int crcTable[256] = {
+        0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3,
+        0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91,
+        0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
+        0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5,
+        0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B,
+        0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
+        0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F,
+        0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D,
+        0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
+        0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01,
+        0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457,
+        0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
+        0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB,
+        0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9,
+        0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
+        0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD,
+        0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683,
+        0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
+        0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7,
+        0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5,
+        0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
+        0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79,
+        0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F,
+        0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
+        0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713,
+        0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21,
+        0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
+        0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45,
+        0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB,
+        0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
+        0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF,
+        0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
+    };
+
+    unsigned int crc = ~0u;
+
+    for (int i = 0; i < len; i++) crc = (crc >> 8)^crcTable[data[i]^(crc&0xff)];
+
+    return ~crc;
+}
+
+// Set password to be used on data decryption
+void rresSetCipherPassword(const char *pass)
+{
+    password = pass;
+}
+
+// Get password to be used on data decryption
+const char *rresGetCipherPassword(void)
+{
+    if (password == NULL) password = "password12345";
+
+    return password;
+}
+
+//----------------------------------------------------------------------------------
+// Module Internal Functions Definition
+//----------------------------------------------------------------------------------
+// Load user resource chunk from resource packed data (as contained in .rres file)
+// WARNING: Data can be compressed and/or encrypted, in those cases is up to the user to process it,
+// and chunk.data.propCount = 0, chunk.data.props = NULL and chunk.data.raw contains all resource packed data
+static rresResourceChunkData rresLoadResourceChunkData(rresResourceChunkInfo info, void *data)
+{
+    rresResourceChunkData chunkData = { 0 };
+
+    // CRC32 data validation, verify packed data is not corrupted
+    unsigned int crc32 = rresComputeCRC32(data, info.packedSize);
+
+    if ((rresGetDataType(info.type) != RRES_DATA_NULL) && (crc32 == info.crc32))   // Make sure chunk contains data and data is not corrupted
+    {
+        // Check if data chunk is compressed/encrypted to retrieve properties + data
+        if ((info.compType == RRES_COMP_NONE) && (info.cipherType == RRES_CIPHER_NONE))
+        {
+            // Data is not compressed/encrypted (info.packedSize = info.baseSize)
+            chunkData.propCount = ((unsigned int *)data)[0];
+
+            if (chunkData.propCount > 0)
+            {
+                chunkData.props = (unsigned int *)RRES_CALLOC(chunkData.propCount, sizeof(unsigned int));
+                for (unsigned int i = 0; i < chunkData.propCount; i++) chunkData.props[i] = ((unsigned int *)data)[i + 1];
+            }
+
+            chunkData.raw = RRES_MALLOC(info.baseSize);
+            memcpy(chunkData.raw, ((unsigned char *)data) + sizeof(int) + (chunkData.propCount*sizeof(int)), info.baseSize);
+        }
+        else
+        {
+            // Data is compressed/encrypted
+            // We just return the loaded resource packed data from .rres file,
+            // it's up to the user to manage decompression/decryption on user library
+            chunkData.raw = RRES_MALLOC(info.packedSize);
+            memcpy(chunkData.raw, (unsigned char *)data, info.packedSize);
+        }
+    }
+
+    if (crc32 != info.crc32) RRES_LOG("RRES: WARNING: [ID %i] CRC32 does not match, data can be corrupted\n", info.id);
+
+    return chunkData;
+}
+
+#endif // RRES_IMPLEMENTATION
diff --git a/include/extern/stb_c_lexer.h b/include/extern/stb_c_lexer.h
new file mode 100644
index 0000000..fd42f1c
--- /dev/null
+++ b/include/extern/stb_c_lexer.h
@@ -0,0 +1,941 @@
+// stb_c_lexer.h - v0.12 - public domain Sean Barrett 2013
+// lexer for making little C-like languages with recursive-descent parsers
+//
+// This file provides both the interface and the implementation.
+// To instantiate the implementation,
+//      #define STB_C_LEXER_IMPLEMENTATION
+// in *ONE* source file, before #including this file.
+//
+// The default configuration is fairly close to a C lexer, although
+// suffixes on integer constants are not handled (you can override this).
+//
+// History:
+//     0.12 fix compilation bug for NUL support; better support separate inclusion
+//     0.11 fix clang static analysis warning
+//     0.10 fix warnings
+//     0.09 hex floats, no-stdlib fixes
+//     0.08 fix bad pointer comparison
+//     0.07 fix mishandling of hexadecimal constants parsed by strtol
+//     0.06 fix missing next character after ending quote mark (Andreas Fredriksson)
+//     0.05 refixed get_location because github version had lost the fix
+//     0.04 fix octal parsing bug
+//     0.03 added STB_C_LEX_DISCARD_PREPROCESSOR option
+//          refactor API to simplify (only one struct instead of two)
+//          change literal enum names to have 'lit' at the end
+//     0.02 first public release
+//
+// Status:
+//     - haven't tested compiling as C++
+//     - haven't tested the float parsing path
+//     - haven't tested the non-default-config paths (e.g. non-stdlib)
+//     - only tested default-config paths by eyeballing output of self-parse
+//
+//     - haven't implemented multiline strings
+//     - haven't implemented octal/hex character constants
+//     - haven't implemented support for unicode CLEX_char
+//     - need to expand error reporting so you don't just get "CLEX_parse_error"
+//
+// Contributors:
+//   Arpad Goretity (bugfix)
+//   Alan Hickman (hex floats)
+//   github:mundusnine (bugfix)
+//
+// LICENSE
+//
+//   See end of file for license information.
+
+#ifdef STB_C_LEXER_IMPLEMENTATION
+#ifndef STB_C_LEXER_DEFINITIONS
+// to change the default parsing rules, copy the following lines
+// into your C/C++ file *before* including this, and then replace
+// the Y's with N's for the ones you don't want. This needs to be
+// set to the same values for every place in your program where
+// stb_c_lexer.h is included.
+// --BEGIN--
+
+#if defined(Y) || defined(N)
+#error "Can only use stb_c_lexer in contexts where the preprocessor symbols 'Y' and 'N' are not defined"
+#endif
+
+#define STB_C_LEX_C_DECIMAL_INTS    Y   //  "0|[1-9][0-9]*"                        CLEX_intlit
+#define STB_C_LEX_C_HEX_INTS        Y   //  "0x[0-9a-fA-F]+"                       CLEX_intlit
+#define STB_C_LEX_C_OCTAL_INTS      Y   //  "[0-7]+"                               CLEX_intlit
+#define STB_C_LEX_C_DECIMAL_FLOATS  Y   //  "[0-9]*(.[0-9]*([eE][-+]?[0-9]+)?)     CLEX_floatlit
+#define STB_C_LEX_C99_HEX_FLOATS    N   //  "0x{hex}+(.{hex}*)?[pP][-+]?{hex}+     CLEX_floatlit
+#define STB_C_LEX_C_IDENTIFIERS     Y   //  "[_a-zA-Z][_a-zA-Z0-9]*"               CLEX_id
+#define STB_C_LEX_C_DQ_STRINGS      Y   //  double-quote-delimited strings with escapes  CLEX_dqstring
+#define STB_C_LEX_C_SQ_STRINGS      N   //  single-quote-delimited strings with escapes  CLEX_ssstring
+#define STB_C_LEX_C_CHARS           Y   //  single-quote-delimited character with escape CLEX_charlits
+#define STB_C_LEX_C_COMMENTS        Y   //  "/* comment */"
+#define STB_C_LEX_CPP_COMMENTS      Y   //  "// comment to end of line\n"
+#define STB_C_LEX_C_COMPARISONS     Y   //  "==" CLEX_eq  "!=" CLEX_noteq   "<=" CLEX_lesseq  ">=" CLEX_greatereq
+#define STB_C_LEX_C_LOGICAL         Y   //  "&&"  CLEX_andand   "||"  CLEX_oror
+#define STB_C_LEX_C_SHIFTS          Y   //  "<<"  CLEX_shl      ">>"  CLEX_shr
+#define STB_C_LEX_C_INCREMENTS      Y   //  "++"  CLEX_plusplus "--"  CLEX_minusminus
+#define STB_C_LEX_C_ARROW           Y   //  "->"  CLEX_arrow
+#define STB_C_LEX_EQUAL_ARROW       N   //  "=>"  CLEX_eqarrow
+#define STB_C_LEX_C_BITWISEEQ       Y   //  "&="  CLEX_andeq    "|="  CLEX_oreq     "^="  CLEX_xoreq
+#define STB_C_LEX_C_ARITHEQ         Y   //  "+="  CLEX_pluseq   "-="  CLEX_minuseq
+                                        //  "*="  CLEX_muleq    "/="  CLEX_diveq    "%=" CLEX_modeq
+                                        //  if both STB_C_LEX_SHIFTS & STB_C_LEX_ARITHEQ:
+                                        //                      "<<=" CLEX_shleq    ">>=" CLEX_shreq
+
+#define STB_C_LEX_PARSE_SUFFIXES    N   // letters after numbers are parsed as part of those numbers, and must be in suffix list below
+#define STB_C_LEX_DECIMAL_SUFFIXES  ""  // decimal integer suffixes e.g. "uUlL" -- these are returned as-is in string storage
+#define STB_C_LEX_HEX_SUFFIXES      ""  // e.g. "uUlL"
+#define STB_C_LEX_OCTAL_SUFFIXES    ""  // e.g. "uUlL"
+#define STB_C_LEX_FLOAT_SUFFIXES    ""  //
+
+#define STB_C_LEX_0_IS_EOF             N  // if Y, ends parsing at '\0'; if N, returns '\0' as token
+#define STB_C_LEX_INTEGERS_AS_DOUBLES  N  // parses integers as doubles so they can be larger than 'int', but only if STB_C_LEX_STDLIB==N
+#define STB_C_LEX_MULTILINE_DSTRINGS   N  // allow newlines in double-quoted strings
+#define STB_C_LEX_MULTILINE_SSTRINGS   N  // allow newlines in single-quoted strings
+#define STB_C_LEX_USE_STDLIB           Y  // use strtod,strtol for parsing #s; otherwise inaccurate hack
+#define STB_C_LEX_DOLLAR_IDENTIFIER    Y  // allow $ as an identifier character
+#define STB_C_LEX_FLOAT_NO_DECIMAL     Y  // allow floats that have no decimal point if they have an exponent
+
+#define STB_C_LEX_DEFINE_ALL_TOKEN_NAMES  N   // if Y, all CLEX_ token names are defined, even if never returned
+                                              // leaving it as N should help you catch config bugs
+
+#define STB_C_LEX_DISCARD_PREPROCESSOR    Y   // discard C-preprocessor directives (e.g. after prepocess
+                                              // still have #line, #pragma, etc)
+
+//#define STB_C_LEX_ISWHITE(str)    ... // return length in bytes of whitespace characters if first char is whitespace
+
+#define STB_C_LEXER_DEFINITIONS         // This line prevents the header file from replacing your definitions
+// --END--
+#endif
+#endif
+
+#ifndef INCLUDE_STB_C_LEXER_H
+#define INCLUDE_STB_C_LEXER_H
+
+typedef struct
+{
+   // lexer variables
+   char *input_stream;
+   char *eof;
+   char *parse_point;
+   char *string_storage;
+   int   string_storage_len;
+
+   // lexer parse location for error messages
+   char *where_firstchar;
+   char *where_lastchar;
+
+   // lexer token variables
+   long token;
+   double real_number;
+   long   int_number;
+   char *string;
+   int string_len;
+} stb_lexer;
+
+typedef struct
+{
+   int line_number;
+   int line_offset;
+} stb_lex_location;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void stb_c_lexer_init(stb_lexer *lexer, const char *input_stream, const char *input_stream_end, char *string_store, int store_length);
+// this function initialize the 'lexer' structure
+//   Input:
+//   - input_stream points to the file to parse, loaded into memory
+//   - input_stream_end points to the end of the file, or NULL if you use 0-for-EOF
+//   - string_store is storage the lexer can use for storing parsed strings and identifiers
+//   - store_length is the length of that storage
+
+extern int stb_c_lexer_get_token(stb_lexer *lexer);
+// this function returns non-zero if a token is parsed, or 0 if at EOF
+//   Output:
+//   - lexer->token is the token ID, which is unicode code point for a single-char token, < 0 for a multichar or eof or error
+//   - lexer->real_number is a double constant value for CLEX_floatlit, or CLEX_intlit if STB_C_LEX_INTEGERS_AS_DOUBLES
+//   - lexer->int_number is an integer constant for CLEX_intlit if !STB_C_LEX_INTEGERS_AS_DOUBLES, or character for CLEX_charlit
+//   - lexer->string is a 0-terminated string for CLEX_dqstring or CLEX_sqstring or CLEX_identifier
+//   - lexer->string_len is the byte length of lexer->string
+
+extern void stb_c_lexer_get_location(const stb_lexer *lexer, const char *where, stb_lex_location *loc);
+// this inefficient function returns the line number and character offset of a
+// given location in the file as returned by stb_lex_token. Because it's inefficient,
+// you should only call it for errors, not for every token.
+// For error messages of invalid tokens, you typically want the location of the start
+// of the token (which caused the token to be invalid). For bugs involving legit
+// tokens, you can report the first or the range.
+//    Output:
+//    - loc->line_number is the line number in the file, counting from 1, of the location
+//    - loc->line_offset is the char-offset in the line, counting from 0, of the location
+
+
+#ifdef __cplusplus
+}
+#endif
+
+enum
+{
+   CLEX_eof = 256,
+   CLEX_parse_error,
+   CLEX_intlit        ,
+   CLEX_floatlit      ,
+   CLEX_id            ,
+   CLEX_dqstring      ,
+   CLEX_sqstring      ,
+   CLEX_charlit       ,
+   CLEX_eq            ,
+   CLEX_noteq         ,
+   CLEX_lesseq        ,
+   CLEX_greatereq     ,
+   CLEX_andand        ,
+   CLEX_oror          ,
+   CLEX_shl           ,
+   CLEX_shr           ,
+   CLEX_plusplus      ,
+   CLEX_minusminus    ,
+   CLEX_pluseq        ,
+   CLEX_minuseq       ,
+   CLEX_muleq         ,
+   CLEX_diveq         ,
+   CLEX_modeq         ,
+   CLEX_andeq         ,
+   CLEX_oreq          ,
+   CLEX_xoreq         ,
+   CLEX_arrow         ,
+   CLEX_eqarrow       ,
+   CLEX_shleq, CLEX_shreq,
+
+   CLEX_first_unused_token
+
+};
+#endif // INCLUDE_STB_C_LEXER_H
+
+#ifdef STB_C_LEXER_IMPLEMENTATION
+
+// Hacky definitions so we can easily #if on them
+#define Y(x) 1
+#define N(x) 0
+
+#if STB_C_LEX_INTEGERS_AS_DOUBLES(x)
+typedef double     stb__clex_int;
+#define intfield   real_number
+#define STB__clex_int_as_double
+#else
+typedef long       stb__clex_int;
+#define intfield   int_number
+#endif
+
+// Convert these config options to simple conditional #defines so we can more
+// easily test them once we've change the meaning of Y/N
+
+#if STB_C_LEX_PARSE_SUFFIXES(x)
+#define STB__clex_parse_suffixes
+#endif
+
+#if STB_C_LEX_C99_HEX_FLOATS(x)
+#define STB__clex_hex_floats
+#endif
+
+#if STB_C_LEX_C_HEX_INTS(x)
+#define STB__clex_hex_ints
+#endif
+
+#if STB_C_LEX_C_DECIMAL_INTS(x)
+#define STB__clex_decimal_ints
+#endif
+
+#if STB_C_LEX_C_OCTAL_INTS(x)
+#define STB__clex_octal_ints
+#endif
+
+#if STB_C_LEX_C_DECIMAL_FLOATS(x)
+#define STB__clex_decimal_floats
+#endif
+
+#if STB_C_LEX_DISCARD_PREPROCESSOR(x)
+#define STB__clex_discard_preprocessor
+#endif
+
+#if STB_C_LEX_USE_STDLIB(x) && (!defined(STB__clex_hex_floats) || __STDC_VERSION__ >= 199901L)
+#define STB__CLEX_use_stdlib
+#include <stdlib.h>
+#endif
+
+// Now for the rest of the file we'll use the basic definition where
+// where Y expands to its contents and N expands to nothing
+#undef  Y
+#define Y(a) a
+#undef N
+#define N(a)
+
+// API function
+void stb_c_lexer_init(stb_lexer *lexer, const char *input_stream, const char *input_stream_end, char *string_store, int store_length)
+{
+   lexer->input_stream = (char *) input_stream;
+   lexer->eof = (char *) input_stream_end;
+   lexer->parse_point = (char *) input_stream;
+   lexer->string_storage = string_store;
+   lexer->string_storage_len = store_length;
+}
+
+// API function
+void stb_c_lexer_get_location(const stb_lexer *lexer, const char *where, stb_lex_location *loc)
+{
+   char *p = lexer->input_stream;
+   int line_number = 1;
+   int char_offset = 0;
+   while (*p && p < where) {
+      if (*p == '\n' || *p == '\r') {
+         p += (p[0]+p[1] == '\r'+'\n' ? 2 : 1); // skip newline
+         line_number += 1;
+         char_offset = 0;
+      } else {
+         ++p;
+         ++char_offset;
+      }
+   }
+   loc->line_number = line_number;
+   loc->line_offset = char_offset;
+}
+
+// main helper function for returning a parsed token
+static int stb__clex_token(stb_lexer *lexer, int token, char *start, char *end)
+{
+   lexer->token = token;
+   lexer->where_firstchar = start;
+   lexer->where_lastchar = end;
+   lexer->parse_point = end+1;
+   return 1;
+}
+
+// helper function for returning eof
+static int stb__clex_eof(stb_lexer *lexer)
+{
+   lexer->token = CLEX_eof;
+   return 0;
+}
+
+static int stb__clex_iswhite(int x)
+{
+   return x == ' ' || x == '\t' || x == '\r' || x == '\n' || x == '\f';
+}
+
+static const char *stb__strchr(const char *str, int ch)
+{
+   for (; *str; ++str)
+      if (*str == ch)
+         return str;
+   return 0;
+}
+
+// parse suffixes at the end of a number
+static int stb__clex_parse_suffixes(stb_lexer *lexer, long tokenid, char *start, char *cur, const char *suffixes)
+{
+   #ifdef STB__clex_parse_suffixes
+   lexer->string = lexer->string_storage;
+   lexer->string_len = 0;
+
+   while ((*cur >= 'a' && *cur <= 'z') || (*cur >= 'A' && *cur <= 'Z')) {
+      if (stb__strchr(suffixes, *cur) == 0)
+         return stb__clex_token(lexer, CLEX_parse_error, start, cur);
+      if (lexer->string_len+1 >= lexer->string_storage_len)
+         return stb__clex_token(lexer, CLEX_parse_error, start, cur);
+      lexer->string[lexer->string_len++] = *cur++;
+   }
+   #else
+   suffixes = suffixes; // attempt to suppress warnings
+   #endif
+   return stb__clex_token(lexer, tokenid, start, cur-1);
+}
+
+#ifndef STB__CLEX_use_stdlib
+static double stb__clex_pow(double base, unsigned int exponent)
+{
+   double value=1;
+   for ( ; exponent; exponent >>= 1) {
+      if (exponent & 1)
+         value *= base;
+      base *= base;
+   }
+   return value;
+}
+
+static double stb__clex_parse_float(char *p, char **q)
+{
+   char *s = p;
+   double value=0;
+   int base=10;
+   int exponent=0;
+
+#ifdef STB__clex_hex_floats
+   if (*p == '0') {
+      if (p[1] == 'x' || p[1] == 'X') {
+         base=16;
+         p += 2;
+      }
+   }
+#endif
+
+   for (;;) {
+      if (*p >= '0' && *p <= '9')
+         value = value*base + (*p++ - '0');
+#ifdef STB__clex_hex_floats
+      else if (base == 16 && *p >= 'a' && *p <= 'f')
+         value = value*base + 10 + (*p++ - 'a');
+      else if (base == 16 && *p >= 'A' && *p <= 'F')
+         value = value*base + 10 + (*p++ - 'A');
+#endif
+      else
+         break;
+   }
+
+   if (*p == '.') {
+      double pow, addend = 0;
+      ++p;
+      for (pow=1; ; pow*=base) {
+         if (*p >= '0' && *p <= '9')
+            addend = addend*base + (*p++ - '0');
+#ifdef STB__clex_hex_floats
+         else if (base == 16 && *p >= 'a' && *p <= 'f')
+            addend = addend*base + 10 + (*p++ - 'a');
+         else if (base == 16 && *p >= 'A' && *p <= 'F')
+            addend = addend*base + 10 + (*p++ - 'A');
+#endif
+         else
+            break;
+      }
+      value += addend / pow;
+   }
+#ifdef STB__clex_hex_floats
+   if (base == 16) {
+      // exponent required for hex float literal
+      if (*p != 'p' && *p != 'P') {
+         *q = s;
+         return 0;
+      }
+      exponent = 1;
+   } else
+#endif
+      exponent = (*p == 'e' || *p == 'E');
+
+   if (exponent) {
+      int sign = p[1] == '-';
+      unsigned int exponent=0;
+      double power=1;
+      ++p;
+      if (*p == '-' || *p == '+')
+         ++p;
+      while (*p >= '0' && *p <= '9')
+         exponent = exponent*10 + (*p++ - '0');
+
+#ifdef STB__clex_hex_floats
+      if (base == 16)
+         power = stb__clex_pow(2, exponent);
+      else
+#endif
+         power = stb__clex_pow(10, exponent);
+      if (sign)
+         value /= power;
+      else
+         value *= power;
+   }
+   *q = p;
+   return value;
+}
+#endif
+
+static int stb__clex_parse_char(char *p, char **q)
+{
+   if (*p == '\\') {
+      *q = p+2; // tentatively guess we'll parse two characters
+      switch(p[1]) {
+         case '\\': return '\\';
+         case '\'': return '\'';
+         case '"': return '"';
+         case 't': return '\t';
+         case 'f': return '\f';
+         case 'n': return '\n';
+         case 'r': return '\r';
+         case '0': return '\0'; // @TODO ocatal constants
+         case 'x': case 'X': return -1; // @TODO hex constants
+         case 'u': return -1; // @TODO unicode constants
+      }
+   }
+   *q = p+1;
+   return (unsigned char) *p;
+}
+
+static int stb__clex_parse_string(stb_lexer *lexer, char *p, int type)
+{
+   char *start = p;
+   char delim = *p++; // grab the " or ' for later matching
+   char *out = lexer->string_storage;
+   char *outend = lexer->string_storage + lexer->string_storage_len;
+   while (*p != delim) {
+      int n;
+      if (*p == '\\') {
+         char *q;
+         n = stb__clex_parse_char(p, &q);
+         if (n < 0)
+            return stb__clex_token(lexer, CLEX_parse_error, start, q);
+         p = q;
+      } else {
+         // @OPTIMIZE: could speed this up by looping-while-not-backslash
+         n = (unsigned char) *p++;
+      }
+      if (out+1 > outend)
+         return stb__clex_token(lexer, CLEX_parse_error, start, p);
+      // @TODO expand unicode escapes to UTF8
+      *out++ = (char) n;
+   }
+   *out = 0;
+   lexer->string = lexer->string_storage;
+   lexer->string_len = (int) (out - lexer->string_storage);
+   return stb__clex_token(lexer, type, start, p);
+}
+
+int stb_c_lexer_get_token(stb_lexer *lexer)
+{
+   char *p = lexer->parse_point;
+
+   // skip whitespace and comments
+   for (;;) {
+      #ifdef STB_C_LEX_ISWHITE
+      while (p != lexer->stream_end) {
+         int n;
+         n = STB_C_LEX_ISWHITE(p);
+         if (n == 0) break;
+         if (lexer->eof && lexer->eof - lexer->parse_point < n)
+            return stb__clex_token(tok, CLEX_parse_error, p,lexer->eof-1);
+         p += n;
+      }
+      #else
+      while (p != lexer->eof && stb__clex_iswhite(*p))
+         ++p;
+      #endif
+
+      STB_C_LEX_CPP_COMMENTS(
+         if (p != lexer->eof && p[0] == '/' && p[1] == '/') {
+            while (p != lexer->eof && *p != '\r' && *p != '\n')
+               ++p;
+            continue;
+         }
+      )
+
+      STB_C_LEX_C_COMMENTS(
+         if (p != lexer->eof && p[0] == '/' && p[1] == '*') {
+            char *start = p;
+            p += 2;
+            while (p != lexer->eof && (p[0] != '*' || p[1] != '/'))
+               ++p;
+            if (p == lexer->eof)
+               return stb__clex_token(lexer, CLEX_parse_error, start, p-1);
+            p += 2;
+            continue;
+         }
+      )
+
+      #ifdef STB__clex_discard_preprocessor
+         // @TODO this discards everything after a '#', regardless
+         // of where in the line the # is, rather than requiring it
+         // be at the start. (because this parser doesn't otherwise
+         // check for line breaks!)
+         if (p != lexer->eof && p[0] == '#') {
+            while (p != lexer->eof && *p != '\r' && *p != '\n')
+               ++p;
+            continue;
+         }
+      #endif
+
+      break;
+   }
+
+   if (p == lexer->eof)
+      return stb__clex_eof(lexer);
+
+   switch (*p) {
+      default:
+         if (   (*p >= 'a' && *p <= 'z')
+             || (*p >= 'A' && *p <= 'Z')
+             || *p == '_' || (unsigned char) *p >= 128    // >= 128 is UTF8 char
+             STB_C_LEX_DOLLAR_IDENTIFIER( || *p == '$' ) )
+         {
+            int n = 0;
+            lexer->string = lexer->string_storage;
+            do {
+               if (n+1 >= lexer->string_storage_len)
+                  return stb__clex_token(lexer, CLEX_parse_error, p, p+n);
+               lexer->string[n] = p[n];
+               ++n;
+            } while (
+                  (p[n] >= 'a' && p[n] <= 'z')
+               || (p[n] >= 'A' && p[n] <= 'Z')
+               || (p[n] >= '0' && p[n] <= '9') // allow digits in middle of identifier
+               || p[n] == '_' || (unsigned char) p[n] >= 128
+                STB_C_LEX_DOLLAR_IDENTIFIER( || p[n] == '$' )
+            );
+            lexer->string[n] = 0;
+            lexer->string_len = n;
+            return stb__clex_token(lexer, CLEX_id, p, p+n-1);
+         }
+
+         // check for EOF
+         STB_C_LEX_0_IS_EOF(
+            if (*p == 0)
+               return stb__clex_eof(lexer);
+         )
+
+      single_char:
+         // not an identifier, return the character as itself
+         return stb__clex_token(lexer, *p, p, p);
+
+      case '+':
+         if (p+1 != lexer->eof) {
+            STB_C_LEX_C_INCREMENTS(if (p[1] == '+') return stb__clex_token(lexer, CLEX_plusplus, p,p+1);)
+            STB_C_LEX_C_ARITHEQ(   if (p[1] == '=') return stb__clex_token(lexer, CLEX_pluseq  , p,p+1);)
+         }
+         goto single_char;
+      case '-':
+         if (p+1 != lexer->eof) {
+            STB_C_LEX_C_INCREMENTS(if (p[1] == '-') return stb__clex_token(lexer, CLEX_minusminus, p,p+1);)
+            STB_C_LEX_C_ARITHEQ(   if (p[1] == '=') return stb__clex_token(lexer, CLEX_minuseq   , p,p+1);)
+            STB_C_LEX_C_ARROW(     if (p[1] == '>') return stb__clex_token(lexer, CLEX_arrow     , p,p+1);)
+         }
+         goto single_char;
+      case '&':
+         if (p+1 != lexer->eof) {
+            STB_C_LEX_C_LOGICAL(  if (p[1] == '&') return stb__clex_token(lexer, CLEX_andand, p,p+1);)
+            STB_C_LEX_C_BITWISEEQ(if (p[1] == '=') return stb__clex_token(lexer, CLEX_andeq , p,p+1);)
+         }
+         goto single_char;
+      case '|':
+         if (p+1 != lexer->eof) {
+            STB_C_LEX_C_LOGICAL(  if (p[1] == '|') return stb__clex_token(lexer, CLEX_oror, p,p+1);)
+            STB_C_LEX_C_BITWISEEQ(if (p[1] == '=') return stb__clex_token(lexer, CLEX_oreq, p,p+1);)
+         }
+         goto single_char;
+      case '=':
+         if (p+1 != lexer->eof) {
+            STB_C_LEX_C_COMPARISONS(if (p[1] == '=') return stb__clex_token(lexer, CLEX_eq, p,p+1);)
+            STB_C_LEX_EQUAL_ARROW(  if (p[1] == '>') return stb__clex_token(lexer, CLEX_eqarrow, p,p+1);)
+         }
+         goto single_char;
+      case '!':
+         STB_C_LEX_C_COMPARISONS(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_noteq, p,p+1);)
+         goto single_char;
+      case '^':
+         STB_C_LEX_C_BITWISEEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_xoreq, p,p+1));
+         goto single_char;
+      case '%':
+         STB_C_LEX_C_ARITHEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_modeq, p,p+1));
+         goto single_char;
+      case '*':
+         STB_C_LEX_C_ARITHEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_muleq, p,p+1));
+         goto single_char;
+      case '/':
+         STB_C_LEX_C_ARITHEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_diveq, p,p+1));
+         goto single_char;
+      case '<':
+         if (p+1 != lexer->eof) {
+            STB_C_LEX_C_COMPARISONS(if (p[1] == '=') return stb__clex_token(lexer, CLEX_lesseq, p,p+1);)
+            STB_C_LEX_C_SHIFTS(     if (p[1] == '<') {
+                                       STB_C_LEX_C_ARITHEQ(if (p+2 != lexer->eof && p[2] == '=')
+                                                              return stb__clex_token(lexer, CLEX_shleq, p,p+2);)
+                                       return stb__clex_token(lexer, CLEX_shl, p,p+1);
+                                    }
+                              )
+         }
+         goto single_char;
+      case '>':
+         if (p+1 != lexer->eof) {
+            STB_C_LEX_C_COMPARISONS(if (p[1] == '=') return stb__clex_token(lexer, CLEX_greatereq, p,p+1);)
+            STB_C_LEX_C_SHIFTS(     if (p[1] == '>') {
+                                       STB_C_LEX_C_ARITHEQ(if (p+2 != lexer->eof && p[2] == '=')
+                                                              return stb__clex_token(lexer, CLEX_shreq, p,p+2);)
+                                       return stb__clex_token(lexer, CLEX_shr, p,p+1);
+                                    }
+                              )
+         }
+         goto single_char;
+
+      case '"':
+         STB_C_LEX_C_DQ_STRINGS(return stb__clex_parse_string(lexer, p, CLEX_dqstring);)
+         goto single_char;
+      case '\'':
+         STB_C_LEX_C_SQ_STRINGS(return stb__clex_parse_string(lexer, p, CLEX_sqstring);)
+         STB_C_LEX_C_CHARS(
+         {
+            char *start = p;
+            lexer->int_number = stb__clex_parse_char(p+1, &p);
+            if (lexer->int_number < 0)
+               return stb__clex_token(lexer, CLEX_parse_error, start,start);
+            if (p == lexer->eof || *p != '\'')
+               return stb__clex_token(lexer, CLEX_parse_error, start,p);
+            return stb__clex_token(lexer, CLEX_charlit, start, p+1);
+         })
+         goto single_char;
+
+      case '0':
+         #if defined(STB__clex_hex_ints) || defined(STB__clex_hex_floats)
+            if (p+1 != lexer->eof) {
+               if (p[1] == 'x' || p[1] == 'X') {
+                  char *q;
+
+                  #ifdef STB__clex_hex_floats
+                  for (q=p+2;
+                       q != lexer->eof && ((*q >= '0' && *q <= '9') || (*q >= 'a' && *q <= 'f') || (*q >= 'A' && *q <= 'F'));
+                       ++q);
+                  if (q != lexer->eof) {
+                     if (*q == '.' STB_C_LEX_FLOAT_NO_DECIMAL(|| *q == 'p' || *q == 'P')) {
+                        #ifdef STB__CLEX_use_stdlib
+                        lexer->real_number = strtod((char *) p, (char**) &q);
+                        #else
+                        lexer->real_number = stb__clex_parse_float(p, &q);
+                        #endif
+
+                        if (p == q)
+                           return stb__clex_token(lexer, CLEX_parse_error, p,q);
+                        return stb__clex_parse_suffixes(lexer, CLEX_floatlit, p,q, STB_C_LEX_FLOAT_SUFFIXES);
+
+                     }
+                  }
+                  #endif   // STB__CLEX_hex_floats
+
+                  #ifdef STB__clex_hex_ints
+                  #ifdef STB__CLEX_use_stdlib
+                  lexer->int_number = strtol((char *) p, (char **) &q, 16);
+                  #else
+                  {
+                     stb__clex_int n=0;
+                     for (q=p+2; q != lexer->eof; ++q) {
+                        if (*q >= '0' && *q <= '9')
+                           n = n*16 + (*q - '0');
+                        else if (*q >= 'a' && *q <= 'f')
+                           n = n*16 + (*q - 'a') + 10;
+                        else if (*q >= 'A' && *q <= 'F')
+                           n = n*16 + (*q - 'A') + 10;
+                        else
+                           break;
+                     }
+                     lexer->int_number = n;
+                  }
+                  #endif
+                  if (q == p+2)
+                     return stb__clex_token(lexer, CLEX_parse_error, p-2,p-1);
+                  return stb__clex_parse_suffixes(lexer, CLEX_intlit, p,q, STB_C_LEX_HEX_SUFFIXES);
+                  #endif
+               }
+            }
+         #endif // defined(STB__clex_hex_ints) || defined(STB__clex_hex_floats)
+         // can't test for octal because we might parse '0.0' as float or as '0' '.' '0',
+         // so have to do float first
+
+         /* FALL THROUGH */
+      case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
+
+         #ifdef STB__clex_decimal_floats
+         {
+            char *q = p;
+            while (q != lexer->eof && (*q >= '0' && *q <= '9'))
+               ++q;
+            if (q != lexer->eof) {
+               if (*q == '.' STB_C_LEX_FLOAT_NO_DECIMAL(|| *q == 'e' || *q == 'E')) {
+                  #ifdef STB__CLEX_use_stdlib
+                  lexer->real_number = strtod((char *) p, (char**) &q);
+                  #else
+                  lexer->real_number = stb__clex_parse_float(p, &q);
+                  #endif
+
+                  return stb__clex_parse_suffixes(lexer, CLEX_floatlit, p,q, STB_C_LEX_FLOAT_SUFFIXES);
+
+               }
+            }
+         }
+         #endif // STB__clex_decimal_floats
+
+         #ifdef STB__clex_octal_ints
+         if (p[0] == '0') {
+            char *q = p;
+            #ifdef STB__CLEX_use_stdlib
+            lexer->int_number = strtol((char *) p, (char **) &q, 8);
+            #else
+            stb__clex_int n=0;
+            while (q != lexer->eof) {
+               if (*q >= '0' && *q <= '7')
+                  n = n*8 + (*q - '0');
+               else
+                  break;
+               ++q;
+            }
+            if (q != lexer->eof && (*q == '8' || *q=='9'))
+               return stb__clex_token(lexer, CLEX_parse_error, p, q);
+            lexer->int_number = n;
+            #endif
+            return stb__clex_parse_suffixes(lexer, CLEX_intlit, p,q, STB_C_LEX_OCTAL_SUFFIXES);
+         }
+         #endif // STB__clex_octal_ints
+
+         #ifdef STB__clex_decimal_ints
+         {
+            char *q = p;
+            #ifdef STB__CLEX_use_stdlib
+            lexer->int_number = strtol((char *) p, (char **) &q, 10);
+            #else
+            stb__clex_int n=0;
+            while (q != lexer->eof) {
+               if (*q >= '0' && *q <= '9')
+                  n = n*10 + (*q - '0');
+               else
+                  break;
+               ++q;
+            }
+            lexer->int_number = n;
+            #endif
+            return stb__clex_parse_suffixes(lexer, CLEX_intlit, p,q, STB_C_LEX_OCTAL_SUFFIXES);
+         }
+         #endif // STB__clex_decimal_ints
+         goto single_char;
+   }
+}
+#endif // STB_C_LEXER_IMPLEMENTATION
+
+#ifdef STB_C_LEXER_SELF_TEST
+#define _CRT_SECURE_NO_WARNINGS
+#include <stdio.h>
+#include <stdlib.h>
+
+static void print_token(stb_lexer *lexer)
+{
+   switch (lexer->token) {
+      case CLEX_id        : printf("_%s", lexer->string); break;
+      case CLEX_eq        : printf("=="); break;
+      case CLEX_noteq     : printf("!="); break;
+      case CLEX_lesseq    : printf("<="); break;
+      case CLEX_greatereq : printf(">="); break;
+      case CLEX_andand    : printf("&&"); break;
+      case CLEX_oror      : printf("||"); break;
+      case CLEX_shl       : printf("<<"); break;
+      case CLEX_shr       : printf(">>"); break;
+      case CLEX_plusplus  : printf("++"); break;
+      case CLEX_minusminus: printf("--"); break;
+      case CLEX_arrow     : printf("->"); break;
+      case CLEX_andeq     : printf("&="); break;
+      case CLEX_oreq      : printf("|="); break;
+      case CLEX_xoreq     : printf("^="); break;
+      case CLEX_pluseq    : printf("+="); break;
+      case CLEX_minuseq   : printf("-="); break;
+      case CLEX_muleq     : printf("*="); break;
+      case CLEX_diveq     : printf("/="); break;
+      case CLEX_modeq     : printf("%%="); break;
+      case CLEX_shleq     : printf("<<="); break;
+      case CLEX_shreq     : printf(">>="); break;
+      case CLEX_eqarrow   : printf("=>"); break;
+      case CLEX_dqstring  : printf("\"%s\"", lexer->string); break;
+      case CLEX_sqstring  : printf("'\"%s\"'", lexer->string); break;
+      case CLEX_charlit   : printf("'%s'", lexer->string); break;
+      #if defined(STB__clex_int_as_double) && !defined(STB__CLEX_use_stdlib)
+      case CLEX_intlit    : printf("#%g", lexer->real_number); break;
+      #else
+      case CLEX_intlit    : printf("#%ld", lexer->int_number); break;
+      #endif
+      case CLEX_floatlit  : printf("%g", lexer->real_number); break;
+      default:
+         if (lexer->token >= 0 && lexer->token < 256)
+            printf("%c", (int) lexer->token);
+         else {
+            printf("<<<UNKNOWN TOKEN %ld >>>\n", lexer->token);
+         }
+         break;
+   }
+}
+
+/* Force a test
+of parsing
+multiline comments */
+
+/*/ comment /*/
+/**/ extern /**/
+
+void dummy(void)
+{
+   double some_floats[] = {
+      1.0501, -10.4e12, 5E+10,
+#if 0   // not supported in C++ or C-pre-99, so don't try to compile it, but let our parser test it
+      0x1.0p+24, 0xff.FP-8, 0x1p-23,
+#endif
+      4.
+   };
+   (void) sizeof(some_floats);
+   (void) some_floats[1];
+
+   printf("test %d",1); // https://github.com/nothings/stb/issues/13
+}
+
+int main(int argc, char **argv)
+{
+   FILE *f = fopen("stb_c_lexer.h","rb");
+   char *text = (char *) malloc(1 << 20);
+   int len = f ? (int) fread(text, 1, 1<<20, f) : -1;
+   stb_lexer lex;
+   if (len < 0) {
+      fprintf(stderr, "Error opening file\n");
+      free(text);
+      fclose(f);
+      return 1;
+   }
+   fclose(f);
+
+   stb_c_lexer_init(&lex, text, text+len, (char *) malloc(0x10000), 0x10000);
+   while (stb_c_lexer_get_token(&lex)) {
+      if (lex.token == CLEX_parse_error) {
+         printf("\n<<<PARSE ERROR>>>\n");
+         break;
+      }
+      print_token(&lex);
+      printf("  ");
+   }
+   return 0;
+}
+#endif
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
diff --git a/include/extern/uuid.h b/include/extern/uuid.h
new file mode 100644
index 0000000..0518ad7
--- /dev/null
+++ b/include/extern/uuid.h
@@ -0,0 +1,318 @@
+/*
+    Single-file, STB-style, library to generate UUID:s. No dependencies
+    except for OS-provided functionality.
+
+    version 0.1, August, 2016
+
+    Copyright (C) 2016- Fredrik Kihlander
+
+    This software is provided 'as-is', without any express or implied
+    warranty.  In no event will the authors be held liable for any damages
+    arising from the use of this software.
+
+    Permission is granted to anyone to use this software for any purpose,
+    including commercial applications, and to alter it and redistribute it
+    freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+       claim that you wrote the original software. If you use this software
+       in a product, an acknowledgment in the product documentation would be
+       appreciated but is not required.
+    2. Altered source versions must be plainly marked as such, and must not be
+       misrepresented as being the original software.
+    3. This notice may not be removed or altered from any source distribution.
+
+    Fredrik Kihlander
+*/
+
+#ifndef UUID_H_INCLUDED
+#define UUID_H_INCLUDED
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct uuid { unsigned char bytes[16]; };
+
+/**
+ * Set uuid to the null_uuid.
+ */
+void uuid0_generate( uuid* res );
+
+/**
+ * Generate an uuid of version 4 ( Random ) into res.
+ * @note res will be the null_uuid on failure.
+ */
+void uuid4_generate( uuid* res );
+
+/**
+ * Return the type of the provided uuid.
+ *
+ * @return 0 if it is the null-uuid
+ *         1 MAC address & date-time
+ *         2 DCE Security
+ *         3 MD5 hash & namespace
+ *         4 Random
+ *         5 SHA-1 hash & namespace
+ *
+ *         -1 on an invalid uuid.
+ */
+int uuid_type( uuid* id );
+
+/**
+ * Converts an uuid to string.
+ * @param id uuid to convert.
+ * @param out pointer to char-buffer where to write uuid, uuid is NOT 0-terminated
+ *            and is expected to be at least 36 bytes.
+ * @return out
+ */
+char* uuid_to_string( uuid* id, char* out );
+
+/**
+ * Convert a string to an uuid.
+ * @param str to convert.
+ * @param out uuid to parse to.
+ * @return true on success.
+ */
+bool uuid_from_string( const char* str, uuid* out );
+
+/**
+ * Copy uuid from src to dst.
+ */
+void uuid_copy( const uuid* src, uuid* dst );
+
+#ifdef __cplusplus
+}
+
+struct _uuid_to_str
+{
+	char str[37];
+	_uuid_to_str( uuid* id )
+	{
+		uuid_to_string( id, str );
+		str[36] = '\0';
+	}
+};
+
+/**
+ * Helper macro to convert uuid to string.
+ */
+#define UUID_TO_STRING( id ) _uuid_to_str( id ).str
+
+#endif // __cplusplus
+
+#if defined(UUID_IMPLEMENTATION)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <ctype.h>
+#include <string.h>
+
+#if defined(__LINUX__) || defined(__linux__) || defined(__ANDROID__)
+#  include <stdio.h>
+#endif
+
+#if defined( _MSC_VER )
+#  include <Objbase.h>
+#  pragma comment(lib, "Ole32.lib")
+#endif
+
+#if defined( __APPLE__ )
+#include <CoreFoundation/CFUUID.h>
+#endif
+
+char* uuid_to_string( uuid* id, char* out )
+{
+	static const char TOHEXCHAR[16] = {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
+
+	char* c = out;
+	int src_byte = 0;
+	for( int i = 0; i < 4; ++i )
+	{
+		*c++ = TOHEXCHAR[ ( id->bytes[src_byte] >> 4 ) & 0xF ];
+		*c++ = TOHEXCHAR[ id->bytes[src_byte] & 0xF ];
+		++src_byte;
+	}
+	*c++ = '-';
+
+	for( int i = 0; i < 2; ++i )
+	{
+		*c++ = TOHEXCHAR[ ( id->bytes[src_byte] >> 4 ) & 0xF ];
+		*c++ = TOHEXCHAR[ id->bytes[src_byte] & 0xF ];
+		++src_byte;
+	}
+	*c++ = '-';
+
+	for( int i = 0; i < 2; ++i )
+	{
+		*c++ = TOHEXCHAR[ ( id->bytes[src_byte] >> 4 ) & 0xF ];
+		*c++ = TOHEXCHAR[ id->bytes[src_byte] & 0xF ];
+		++src_byte;
+	}
+	*c++ = '-';
+
+	for( int i = 0; i < 2; ++i )
+	{
+		*c++ = TOHEXCHAR[ ( id->bytes[src_byte] >> 4 ) & 0xF ];
+		*c++ = TOHEXCHAR[ id->bytes[src_byte] & 0xF ];
+		++src_byte;
+	}
+	*c++ = '-';
+
+	for( int i = 0; i < 6; ++i )
+	{
+		*c++ = TOHEXCHAR[ ( id->bytes[src_byte] >> 4 ) & 0xF ];
+		*c++ = TOHEXCHAR[ id->bytes[src_byte] & 0xF ];
+		++src_byte;
+	}
+
+	return out;
+}
+
+bool uuid_from_string( const char* str, uuid* out )
+{
+	char uuid_str[32];
+	char* outc = uuid_str;
+	for( int i = 0; i < 36; ++i )
+	{
+		char c = str[i];
+		if( i == 8 || i == 13 || i == 18 || i == 23 )
+		{
+			if( c != '-' )
+				return false;
+		}
+		else
+		{
+			if( !isxdigit( c ) )
+				return false;
+			*outc = (char)tolower( c );
+			++outc;
+		}
+	}
+
+#define UUID_HEXCHRTO_DEC( c ) (unsigned char)( (c) <= '9' ? ( (c) - '0' ) : 10 + (c) - ( (c) <= 'F' ? 'A' : 'a' ) )
+
+	for( int byte = 0; byte < 16; ++byte )
+	{
+		unsigned char v1 = UUID_HEXCHRTO_DEC( uuid_str[ byte * 2 ] );
+		unsigned char v2 = UUID_HEXCHRTO_DEC( uuid_str[ byte * 2 + 1 ] );
+		out->bytes[byte] = (unsigned char)(( v1 << 4 ) | v2);
+	}
+#undef UUID_HEXCHRTO_DEC
+
+	return true;
+}
+
+void uuid0_generate( uuid* res )
+{
+	memset( res, 0x0, sizeof(uuid) );
+}
+
+void uuid4_generate( uuid* res )
+{
+	uuid0_generate( res );
+
+#if defined(__LINUX__) || defined(__linux__) || defined(__ANDROID__)
+	FILE* f = fopen( "/proc/sys/kernel/random/uuid", "rb" );
+	if( f == 0x0 )
+		return;
+
+	char uuid_str[36];
+	size_t read = fread( uuid_str, 1, sizeof( uuid_str ), f );
+	fclose(f);
+	if( read != 36 )
+		return;
+	uuid_from_string( uuid_str, res );
+#elif defined(_MSC_VER)
+	GUID g;
+	HRESULT hres = CoCreateGuid( &g );
+	if( hres != S_OK )
+		return;
+	// ... endian swap to little endian to make uuid memcpy:able ...
+	g.Data1 = ( ( g.Data1 & 0x00FF ) << 24 ) | ( ( g.Data1 & 0xFF00 ) << 8) | ( ( g.Data1 >> 8 ) & 0xFF00 ) | ( ( g.Data1 >> 24 ) & 0x00FF );
+	g.Data2 = (WORD)( ( ( g.Data2 & 0x00FF ) << 8 ) | ( ( g.Data2 & 0xFF00 ) >> 8 ) );
+	g.Data3 = (WORD)( ( ( g.Data3 & 0x00FF ) << 8 ) | ( ( g.Data3 & 0xFF00 ) >> 8 ) );
+	memcpy( res->bytes, &g, sizeof( res->bytes ) );
+#elif defined( __APPLE__ )
+	CFUUIDRef new_uuid = CFUUIDCreate(0x0);
+	CFUUIDBytes bytes = CFUUIDGetUUIDBytes( new_uuid );
+
+	res->bytes[0] = bytes.byte0;
+	res->bytes[1] = bytes.byte1;
+	res->bytes[2] = bytes.byte2;
+	res->bytes[3] = bytes.byte3;
+	res->bytes[4] = bytes.byte4;
+	res->bytes[5] = bytes.byte5;
+	res->bytes[6] = bytes.byte6;
+	res->bytes[7] = bytes.byte7;
+	res->bytes[8] = bytes.byte8;
+	res->bytes[9] = bytes.byte9;
+	res->bytes[10] = bytes.byte10;
+	res->bytes[11] = bytes.byte11;
+	res->bytes[12] = bytes.byte12;
+	res->bytes[13] = bytes.byte13;
+	res->bytes[14] = bytes.byte14;
+	res->bytes[15] = bytes.byte15;
+	CFRelease ( new_uuid );
+#else
+#  error "unhandled platform"
+#endif
+}
+
+int uuid_type( uuid* id )
+{
+	switch( ( id->bytes[6] & 0xF0 ) >> 4 )
+	{
+		case 0:
+			for( int i = 0; i < 16; ++i )
+				if( id->bytes[i] != 0 )
+					return -1;
+			return 0;
+		case 1:
+			return 1;
+		case 2:
+			return 2;
+		case 3:
+			switch( ( id->bytes[8] & 0xF0 ) >> 4 )
+			{
+				case 8:
+				case 9:
+				case 10:
+				case 11:
+					return 4;
+				default:
+					return -1;
+			}
+			break;
+		case 4:
+			switch( ( id->bytes[8] & 0xF0 ) >> 4 )
+			{
+				case 8:
+				case 9:
+				case 10:
+				case 11:
+					return 4;
+				default:
+					return -1;
+			}
+			break;
+		case 5:
+			return 5;
+	}
+	return -1;
+}
+
+void uuid_copy( const uuid* src, uuid* dst )
+{
+	memcpy( dst, src, sizeof(uuid) );
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // UUID_IMPLEMENTATION
+
+#endif // UUID_H_INCLUDED
diff --git a/include/item.h b/include/item.h
new file mode 100644
index 0000000..bffbee3
--- /dev/null
+++ b/include/item.h
@@ -0,0 +1,35 @@
+#ifndef ITEM_H
+# define ITEM_H
+
+#include <core.h>
+
+typedef enum {
+	ITEM_EQUIPABLE,
+	ITEM_CONSUMABLE,
+	ITEM_PLACEABLE,
+	ITEM_QUEST,
+}	ITEM_FLAGS;
+
+typedef struct {
+	int	number;
+	int	description;//ref to item description table
+	int	name;//ref to item name table
+	int	rarity;
+	int	flags;
+	int	stack;
+}	Item;
+
+/*
+inventory could be a grid that have an item focused,
+the item focused have a description and stats windows on the left,
+the focused item is either the one under the mouse
+or can be choosen using arrow keys (easier controller support)
+*/
+typedef struct {
+	int		size;
+	int		capacity;
+	Item	storage[100];
+}	Inventory;
+
+
+#endif
diff --git a/include/player.h b/include/player.h
new file mode 100644
index 0000000..3a10fbb
--- /dev/null
+++ b/include/player.h
@@ -0,0 +1,38 @@
+#ifndef PLAYER_H
+# define PLAYER_H
+
+#include <core.h>
+#include <item.h>
+#include <entity.h>
+#include <skill.h>
+
+typedef enum {
+	ACTION_FORWARD,
+	ACTION_BACKWARD,
+	ACTION_LEFT,
+	ACTION_RIGHT,
+	ACTION_PRIMARY,
+	ACTION_SECONDARY,
+	ACTION_USE,
+	ACTION_GRAB,
+	ACTION_JUMP,
+	ACTION_DASH,
+	ACTION_SKILL1,
+	ACTION_TOOLBAR1,
+}	PLAYER_ACTION;
+
+typedef struct {
+	char	*name;
+	char	*gender;
+	char	*nickname;//can be changed, what npc refers to when talking to you, also npc do not know you until you interact with them
+}	Identity;
+
+typedef struct {
+	Entity		entity;
+	Identity	identity;
+	Skill		skills[10];
+	Inventory	inventory;
+}	Player;
+//give more enphasis on the character than the item used (~60% ,~30%)
+
+#endif
diff --git a/include/quest.h b/include/quest.h
new file mode 100644
index 0000000..dbe3df1
--- /dev/null
+++ b/include/quest.h
@@ -0,0 +1,23 @@
+#ifndef QUEST_H
+# define QUEST_H
+
+// Quest & Task
+typedef struct {
+	int	id;
+	int	prerequisite;
+	int	objectiv;
+	int	status;//unknow, known, accepted, achieved, completed, unavailable
+}	Quest;
+
+typedef enum {
+	QUEST_SUCCESS,
+	QUEST_FAILURE,
+	QUEST_TAKEN,
+	QUEST_KNOW,
+	QUEST_UNKNOW,
+	QUEST_COMPLETE,
+}	QUEST_STATE;
+
+//Quest are missive or Scroll that the player can open with info on it, but no mini map or marker Also the map should be another scroll that is handdrawn, and the player can put marker and orient himself using landscape point of interest
+
+#endif
diff --git a/include/skill.h b/include/skill.h
new file mode 100644
index 0000000..13bfaa1
--- /dev/null
+++ b/include/skill.h
@@ -0,0 +1,19 @@
+# ifndef SKILL_H
+#  define SKILL_H
+
+typedef enum {
+	SKILL_NONE,
+	SKILL_PASSIF,
+	SKILL_MASTERY,
+	SKILL_ACTIF,
+	SKILL_LABOR,
+	SKILL_COMBAT,
+	SKILL_KNOWLEDGE,
+	SKILL_PROGRESSION,
+	SKILL_INATE,
+}	SKILL_TYPE;
+
+typedef struct {
+}	Skill;
+
+#endif
diff --git a/include/struct.h b/include/struct.h
deleted file mode 100644
index 42a7fa9..0000000
--- a/include/struct.h
+++ /dev/null
@@ -1,168 +0,0 @@
-#ifndef ENGINE_STRUCT
-# define ENGINE_STRUCT
-
-#include <core.h>
-#include <enum.h>
-
-typedef struct {
-	Font fonts[4];//
-	Texture	textures;//
-	Model	models;//
-	Sound	sound;
-}	Assets;
-
-// Event
-typedef struct {
-	EVENT_TYPE	type;
-	int			value;
-	double		time;
-	char*		info;
-	int			initiator;
-	int			actors;
-	int			actions;
-}	Event;
-
-//cities
-//hideout
-//tiles
-//level
-//npc
-//clan
-//army
-//team
-
-typedef struct {
-
-}	Damage;
-
-typedef struct {
-	EVENT_TYPE		trigger;
-	ACTIVITY_STATE	required_state;
-	bool			(*condition)(int e, Event ev);
-	void			*(*effect)(int e, Event ev);
-}	virtualRule;
-
-typedef struct {
-	EVENT_TYPE		trigger;
-	ACTIVITY_STATE	required_state;
-	bool			(*condition)(int e, Event ev);
-	void			*(*effect)(int e, Event ev);
-}	physicalRule;
-
-typedef struct {
-	int strenght;
-	int agility;
-	int toughness;
-	int	proprioception;
-	int	earing;
-	int	touch;
-	int	eyesight;
-}	BodyStats;
-
-typedef struct {
-	int	intellect;
-	int fortitude;
-	int charisma;
-	int eloquence;
-	int perception;
-}	MentalStats;
-
-typedef struct {
-	LIMB_TYPE	type;
-	LIMB_STATE	state;
-}	Limb;
-
-typedef struct {
-	Limb		limbs[10];
-	BodyStats	body_stats;
-	MentalStats	mental_stats;
-	int			mass;
-	Vector3		pos;
-	Vector3		velocity;
-	int			health;
-}	Body;
-
-typedef struct {
-	Body			body;
-	int				faction;
-	ENTITY_STATE	state;//can be multiple flag
-}	Entity;
-
-typedef struct {
-	Entity entity;
-	void	(*ai)(void);
-}	Mob;
-
-// Item
-
-typedef struct {
-	int	number;
-	int	description;//ref to item description table
-	int	name;//ref to item name table
-	int	rarity;
-	int	flags;
-	int	stack;
-}	Item;
-
-/*
-inventory could be a grid that have an item focused,
-the item focused have a description and stats windows on the left,
-the focused item is either the one under the mouse
-or can be choosen using arrow keys (easier controller support)
-*/
-typedef struct {
-	int		size;
-	int		capacity;
-	Item	storage[100];
-}	Inventory;
-
-// Player
-
-typedef struct {
-	char	*name;
-	char	*gender;
-	char	*nickname;//can be changed, what npc refers to when talking to you, also npc do not know you until you interact with them
-}	Identity;
-
-typedef struct {
-}	Skill;
-
-typedef struct {
-	Entity		entity;
-	Identity	identity;
-	Skill		skills[10];
-	Inventory	inventory;
-}	Player;
-//give more enphasis on the character than the item used (~60% ,~30%)
-
-// Quest & Task
-typedef struct {
-	int	id;
-	int	prerequisite;
-	int	objectiv;
-	int	status;//unknow, known, accepted, achieved, completed, unavailable
-}	Quest;
-
-//Quest are missive or Scroll that the player can open with info on it, but no mini map or marker Also the map should be another scroll that is handdrawn, and the player can put marker and orient himself using landscape point of interest
-
-// Other Struct
-
-typedef struct {
-	int		key[4];//should be the number of action in PLAYER_ACTION
-	union {
-		int		key;
-		int		pad;
-		int		mouse;
-	}	press;
-	Vector2	mouse_pos;
-	Vector2	mouse_delta;
-	//int		mouse_pressed[2];
-}	Input;
-
-typedef struct {
-	int state;
-	Player player;
-	Assets assets;
-}	Context;
-
-# endif
diff --git a/tools/entity_creator.c b/other/entity_creator.c
similarity index 100%
rename from tools/entity_creator.c
rename to other/entity_creator.c
diff --git a/games/gamemode.c b/other/gamemode.c
similarity index 100%
rename from games/gamemode.c
rename to other/gamemode.c
diff --git a/tools/item_creator.c b/other/item_creator.c
similarity index 100%
rename from tools/item_creator.c
rename to other/item_creator.c
diff --git a/source/worker_old.md b/other/worker_old.md
similarity index 100%
rename from source/worker_old.md
rename to other/worker_old.md
diff --git a/source/core_data.c b/source/core_data.c
index d22bbc4..cc6e238 100644
--- a/source/core_data.c
+++ b/source/core_data.c
@@ -1,8 +1,8 @@
 #include <core.h>
 #define QUEUE_IMPLEMENTATION
-#include <queue.h>
+#include <data_struct/queue.h>
 //#define OCTREE_IMPLEMENTATION
-//#include <octree.h>
+//#include <data_struct/octree.h>
 
 // Frame Allocator
 __thread static char*	frame_buffer = 0x00;
diff --git a/source/core_entity.c b/source/core_entity.c
index edbf3a2..78337dc 100644
--- a/source/core_entity.c
+++ b/source/core_entity.c
@@ -1,5 +1,5 @@
 #include <engine.h>
-#include <queue.h>
+#include <data_struct/queue.h>
 
 #define MAX_ENTITIES 4096
 
diff --git a/source/core_event.c b/source/core_event.c
index 0d15cb1..0dffd95 100644
--- a/source/core_event.c
+++ b/source/core_event.c
@@ -1,5 +1,5 @@
 #include <engine.h>
-#include <queue.h>
+#include <data_struct/queue.h>
 //should have a event structure, and a system can subscribe to an event, then whenever the event is triggered by a system, subscriber system should receive a signal.
 
 typedef struct {
diff --git a/source/layout/gui_DebugTerminal.h b/source/layout/gui_DebugTerminal.h
new file mode 100644
index 0000000..f5bc249
--- /dev/null
+++ b/source/layout/gui_DebugTerminal.h
@@ -0,0 +1,150 @@
+/*******************************************************************************************
+*
+*   DebugTerminal v1.0.0 - Tool Description
+*
+*   MODULE USAGE:
+*       #define GUI_DEBUGTERMINAL_IMPLEMENTATION
+*       #include "gui_DebugTerminal.h"
+*
+*       INIT: GuiDebugTerminalState state = InitGuiDebugTerminal();
+*       DRAW: GuiDebugTerminal(&state);
+*
+*   LICENSE: Propietary License
+*
+*   Copyright (c) 2022 SleepeeSoftware. All Rights Reserved.
+*
+*   Unauthorized copying of this file, via any medium is strictly prohibited
+*   This project is proprietary and confidential unless the owner allows
+*   usage in any other form by expresely written permission.
+*
+**********************************************************************************************/
+
+#include "raylib.h"
+
+// WARNING: raygui implementation is expected to be defined before including this header
+#undef RAYGUI_IMPLEMENTATION
+#include "raygui.h"
+
+#include <string.h>     // Required for: strcpy()
+
+#ifndef GUI_DEBUGTERMINAL_H
+#define GUI_DEBUGTERMINAL_H
+
+typedef struct {
+    // Define anchors
+    Vector2 anchor01;            // ANCHOR ID:1
+    
+    // Define controls variables
+    bool WindowBox000Active;            // WindowBox: WindowBox000
+    Rectangle TerminalOutputPanelScrollView;
+    Vector2 TerminalOutputPanelScrollOffset;
+    Vector2 TerminalOutputPanelBoundsOffset;            // ScrollPanel: TerminalOutputPanel
+    bool TerminalInputBoxEditMode;
+    char TerminalInputBoxText[128];            // TextBox: TerminalInputBox
+
+    // Define rectangles
+    Rectangle layoutRecs[4];
+
+    // Custom state variables (depend on development software)
+    // NOTE: This variables should be added manually if required
+
+} GuiDebugTerminalState;
+
+#ifdef __cplusplus
+extern "C" {            // Prevents name mangling of functions
+#endif
+
+//----------------------------------------------------------------------------------
+// Defines and Macros
+//----------------------------------------------------------------------------------
+//...
+
+//----------------------------------------------------------------------------------
+// Types and Structures Definition
+//----------------------------------------------------------------------------------
+// ...
+
+//----------------------------------------------------------------------------------
+// Module Functions Declaration
+//----------------------------------------------------------------------------------
+GuiDebugTerminalState InitGuiDebugTerminal(void);
+void GuiDebugTerminal(GuiDebugTerminalState *state);
+static void Button003();                // Button: Button003 logic
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // GUI_DEBUGTERMINAL_H
+
+/***********************************************************************************
+*
+*   GUI_DEBUGTERMINAL IMPLEMENTATION
+*
+************************************************************************************/
+#if defined(GUI_DEBUGTERMINAL_IMPLEMENTATION)
+
+#include "raygui.h"
+
+//----------------------------------------------------------------------------------
+// Global Variables Definition
+//----------------------------------------------------------------------------------
+//...
+
+//----------------------------------------------------------------------------------
+// Internal Module Functions Definition
+//----------------------------------------------------------------------------------
+//...
+
+//----------------------------------------------------------------------------------
+// Module Functions Definition
+//----------------------------------------------------------------------------------
+GuiDebugTerminalState InitGuiDebugTerminal(void)
+{
+    GuiDebugTerminalState state = { 0 };
+
+    // Init anchors
+    state.anchor01 = (Vector2){ 48, 24 };            // ANCHOR ID:1
+    
+    // Initilize controls variables
+    state.WindowBox000Active = true;            // WindowBox: WindowBox000
+    state.TerminalOutputPanelScrollView = (Rectangle){ 0, 0, 0, 0 };
+    state.TerminalOutputPanelScrollOffset = (Vector2){ 0, 0 };
+    state.TerminalOutputPanelBoundsOffset = (Vector2){ 0, 0 };            // ScrollPanel: TerminalOutputPanel
+    state.TerminalInputBoxEditMode = false;
+    strcpy(state.TerminalInputBoxText, "");            // TextBox: TerminalInputBox
+
+    // Init controls rectangles
+    state.layoutRecs[0] = (Rectangle){ state.anchor01.x + 0, state.anchor01.y + 0, 984, 624 };// WindowBox: WindowBox000
+    state.layoutRecs[1] = (Rectangle){ state.anchor01.x + 24, state.anchor01.y + 48, 936, 448 };// ScrollPanel: TerminalOutputPanel
+    state.layoutRecs[2] = (Rectangle){ state.anchor01.x + 24, state.anchor01.y + 528, 840, 24 };// TextBox: TerminalInputBox
+    state.layoutRecs[3] = (Rectangle){ state.anchor01.x + 840, state.anchor01.y + 576, 96, 24 };// Button: Button003
+
+    // Custom variables initialization
+
+    return state;
+}
+// Button: Button003 logic
+static void Button003()
+{
+    // TODO: Implement control logic
+}
+
+
+void GuiDebugTerminal(GuiDebugTerminalState *state)
+{
+    // Const text
+    const char *WindowBox000Text = "Debug Terminal";    // WINDOWBOX: WindowBox000
+    const char *Button003Text = "Submit";    // BUTTON: Button003
+    
+    // Draw controls
+    if (state->WindowBox000Active)
+    {
+        state->WindowBox000Active = !GuiWindowBox(state->layoutRecs[0], WindowBox000Text);
+        GuiScrollPanel((Rectangle){state->layoutRecs[1].x, state->layoutRecs[1].y, state->layoutRecs[1].width - state->TerminalOutputPanelBoundsOffset.x, state->layoutRecs[1].height - state->TerminalOutputPanelBoundsOffset.y }, TerminalOutputPanelText, state->layoutRecs[1], &state->TerminalOutputPanelScrollOffset, &state->TerminalOutputPanelScrollView);
+        if (GuiTextBox(state->layoutRecs[2], state->TerminalInputBoxText, 128, state->TerminalInputBoxEditMode)) state->TerminalInputBoxEditMode = !state->TerminalInputBoxEditMode;
+        if (GuiButton(state->layoutRecs[3], Button003Text)) Button003(); 
+    }
+}
+
+#endif // GUI_DEBUGTERMINAL_IMPLEMENTATION
diff --git a/source/layout/gui_LoadingScreen.h b/source/layout/gui_LoadingScreen.h
new file mode 100644
index 0000000..ad5f6f9
--- /dev/null
+++ b/source/layout/gui_LoadingScreen.h
@@ -0,0 +1,139 @@
+/*******************************************************************************************
+*
+*   LoadingScreen v1.0.0 - Tool Description
+*
+*   MODULE USAGE:
+*       #define GUI_LOADINGSCREEN_IMPLEMENTATION
+*       #include "gui_LoadingScreen.h"
+*
+*       INIT: GuiLoadingScreenState state = InitGuiLoadingScreen();
+*       DRAW: GuiLoadingScreen(&state);
+*
+*   LICENSE: Propietary License
+*
+*   Copyright (c) 2022 SleepeeSoftware. All Rights Reserved.
+*
+*   Unauthorized copying of this file, via any medium is strictly prohibited
+*   This project is proprietary and confidential unless the owner allows
+*   usage in any other form by expresely written permission.
+*
+**********************************************************************************************/
+
+#include "raylib.h"
+
+// WARNING: raygui implementation is expected to be defined before including this header
+#undef RAYGUI_IMPLEMENTATION
+#include "raygui.h"
+
+#include <string.h>     // Required for: strcpy()
+
+#ifndef GUI_LOADINGSCREEN_H
+#define GUI_LOADINGSCREEN_H
+
+typedef struct {
+    // Define anchors
+    Vector2 anchor01;            // ANCHOR ID:1
+    
+    // Define controls variables
+    float ProgressBar000Value;            // ProgressBar: ProgressBar000
+    Rectangle ScrollPanel002ScrollView;
+    Vector2 ScrollPanel002ScrollOffset;
+    Vector2 ScrollPanel002BoundsOffset;            // ScrollPanel: ScrollPanel002
+
+    // Define rectangles
+    Rectangle layoutRecs[4];
+
+    // Custom state variables (depend on development software)
+    // NOTE: This variables should be added manually if required
+
+} GuiLoadingScreenState;
+
+#ifdef __cplusplus
+extern "C" {            // Prevents name mangling of functions
+#endif
+
+//----------------------------------------------------------------------------------
+// Defines and Macros
+//----------------------------------------------------------------------------------
+//...
+
+//----------------------------------------------------------------------------------
+// Types and Structures Definition
+//----------------------------------------------------------------------------------
+// ...
+
+//----------------------------------------------------------------------------------
+// Module Functions Declaration
+//----------------------------------------------------------------------------------
+GuiLoadingScreenState InitGuiLoadingScreen(void);
+void GuiLoadingScreen(GuiLoadingScreenState *state);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // GUI_LOADINGSCREEN_H
+
+/***********************************************************************************
+*
+*   GUI_LOADINGSCREEN IMPLEMENTATION
+*
+************************************************************************************/
+#if defined(GUI_LOADINGSCREEN_IMPLEMENTATION)
+
+#include "raygui.h"
+
+//----------------------------------------------------------------------------------
+// Global Variables Definition
+//----------------------------------------------------------------------------------
+//...
+
+//----------------------------------------------------------------------------------
+// Internal Module Functions Definition
+//----------------------------------------------------------------------------------
+//...
+
+//----------------------------------------------------------------------------------
+// Module Functions Definition
+//----------------------------------------------------------------------------------
+GuiLoadingScreenState InitGuiLoadingScreen(void)
+{
+    GuiLoadingScreenState state = { 0 };
+
+    // Init anchors
+    state.anchor01 = (Vector2){ 24, 24 };            // ANCHOR ID:1
+    
+    // Initilize controls variables
+    state.ProgressBar000Value = 0.0f;            // ProgressBar: ProgressBar000
+    state.ScrollPanel002ScrollView = (Rectangle){ 0, 0, 0, 0 };
+    state.ScrollPanel002ScrollOffset = (Vector2){ 0, 0 };
+    state.ScrollPanel002BoundsOffset = (Vector2){ 0, 0 };            // ScrollPanel: ScrollPanel002
+
+    // Init controls rectangles
+    state.layoutRecs[0] = (Rectangle){ state.anchor01.x + 192, state.anchor01.y + 576, 672, 24 };// ProgressBar: ProgressBar000
+    state.layoutRecs[1] = (Rectangle){ state.anchor01.x + 48, state.anchor01.y + 72, 984, 480 };// ScrollPanel: ScrollPanel002
+    state.layoutRecs[2] = (Rectangle){ state.anchor01.x + 0, state.anchor01.y + 0, 1080, 632 };// GroupBox: GroupBox003
+    state.layoutRecs[3] = (Rectangle){ state.anchor01.x + 72, state.anchor01.y + 24, 120, 24 };// Label: Label003
+
+    // Custom variables initialization
+
+    return state;
+}
+
+
+void GuiLoadingScreen(GuiLoadingScreenState *state)
+{
+    // Const text
+    const char *ProgressBar000Text = "";    // PROGRESSBAR: ProgressBar000
+    const char *GroupBox003Text = "Loading Screen";    // GROUPBOX: GroupBox003
+    const char *Label003Text = "Debug Console";    // LABEL: Label003
+    
+    // Draw controls
+    GuiProgressBar(state->layoutRecs[0], ProgressBar000Text, NULL, &state->ProgressBar000Value, 0, 1);
+    GuiScrollPanel((Rectangle){state->layoutRecs[1].x, state->layoutRecs[1].y, state->layoutRecs[1].width - state->ScrollPanel002BoundsOffset.x, state->layoutRecs[1].height - state->ScrollPanel002BoundsOffset.y }, ScrollPanel002Text, state->layoutRecs[1], &state->ScrollPanel002ScrollOffset, &state->ScrollPanel002ScrollView);
+    GuiGroupBox(state->layoutRecs[2], GroupBox003Text);
+    GuiLabel(state->layoutRecs[3], Label003Text);
+}
+
+#endif // GUI_LOADINGSCREEN_IMPLEMENTATION
diff --git a/source/layout/gui_MainMenu.h b/source/layout/gui_MainMenu.h
new file mode 100644
index 0000000..4d5dfab
--- /dev/null
+++ b/source/layout/gui_MainMenu.h
@@ -0,0 +1,149 @@
+/*******************************************************************************************
+*
+*   MainMenu v1.0.0 - Tool Description
+*
+*   MODULE USAGE:
+*       #define GUI_MAINMENU_IMPLEMENTATION
+*       #include "gui_MainMenu.h"
+*
+*       INIT: GuiMainMenuState state = InitGuiMainMenu();
+*       DRAW: GuiMainMenu(&state);
+*
+*   LICENSE: Propietary License
+*
+*   Copyright (c) 2022 SleepeeSoftware. All Rights Reserved.
+*
+*   Unauthorized copying of this file, via any medium is strictly prohibited
+*   This project is proprietary and confidential unless the owner allows
+*   usage in any other form by expresely written permission.
+*
+**********************************************************************************************/
+
+#include "raylib.h"
+
+// WARNING: raygui implementation is expected to be defined before including this header
+#undef RAYGUI_IMPLEMENTATION
+#include "raygui.h"
+
+#include <string.h>     // Required for: strcpy()
+
+#ifndef GUI_MAINMENU_H
+#define GUI_MAINMENU_H
+
+typedef struct {
+    // Define anchors
+    
+    // Define controls variables
+    bool Toggle001Active;            // Toggle: Toggle001
+
+    // Define rectangles
+    Rectangle layoutRecs[4];
+
+    // Custom state variables (depend on development software)
+    // NOTE: This variables should be added manually if required
+
+} GuiMainMenuState;
+
+#ifdef __cplusplus
+extern "C" {            // Prevents name mangling of functions
+#endif
+
+//----------------------------------------------------------------------------------
+// Defines and Macros
+//----------------------------------------------------------------------------------
+//...
+
+//----------------------------------------------------------------------------------
+// Types and Structures Definition
+//----------------------------------------------------------------------------------
+// ...
+
+//----------------------------------------------------------------------------------
+// Module Functions Declaration
+//----------------------------------------------------------------------------------
+GuiMainMenuState InitGuiMainMenu(void);
+void GuiMainMenu(GuiMainMenuState *state);
+static void Button001();                // Button: Button001 logic
+static void Button002();                // Button: Button002 logic
+static void Button004();                // Button: Button004 logic
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // GUI_MAINMENU_H
+
+/***********************************************************************************
+*
+*   GUI_MAINMENU IMPLEMENTATION
+*
+************************************************************************************/
+#if defined(GUI_MAINMENU_IMPLEMENTATION)
+
+#include "raygui.h"
+
+//----------------------------------------------------------------------------------
+// Global Variables Definition
+//----------------------------------------------------------------------------------
+//...
+
+//----------------------------------------------------------------------------------
+// Internal Module Functions Definition
+//----------------------------------------------------------------------------------
+//...
+
+//----------------------------------------------------------------------------------
+// Module Functions Definition
+//----------------------------------------------------------------------------------
+GuiMainMenuState InitGuiMainMenu(void)
+{
+    GuiMainMenuState state = { 0 };
+
+    // Init anchors
+    
+    // Initilize controls variables
+    state.Toggle001Active = true;            // Toggle: Toggle001
+
+    // Init controls rectangles
+    state.layoutRecs[0] = (Rectangle){ 120, 456, 144, 48 };// Toggle: Toggle001
+    state.layoutRecs[1] = (Rectangle){ 432, 264, 144, 48 };// Button: Button001
+    state.layoutRecs[2] = (Rectangle){ 120, 288, 144, 48 };// Button: Button002
+    state.layoutRecs[3] = (Rectangle){ 768, 456, 144, 48 };// Button: Button004
+
+    // Custom variables initialization
+
+    return state;
+}
+// Button: Button001 logic
+static void Button001()
+{
+    // TODO: Implement control logic
+}
+// Button: Button002 logic
+static void Button002()
+{
+    // TODO: Implement control logic
+}
+// Button: Button004 logic
+static void Button004()
+{
+    // TODO: Implement control logic
+}
+
+
+void GuiMainMenu(GuiMainMenuState *state)
+{
+    // Const text
+    const char *Toggle001Text = "Debug Toggle";    // TOGGLE: Toggle001
+    const char *Button001Text = "Play";    // BUTTON: Button001
+    const char *Button002Text = "Settings";    // BUTTON: Button002
+    const char *Button004Text = "Shutdown";    // BUTTON: Button004
+    
+    // Draw controls
+    GuiToggle(state->layoutRecs[0], Toggle001Text, &state->Toggle001Active);
+    if (GuiButton(state->layoutRecs[1], Button001Text)) Button001(); 
+    if (GuiButton(state->layoutRecs[2], Button002Text)) Button002(); 
+    if (GuiButton(state->layoutRecs[3], Button004Text)) Button004(); 
+}
+
+#endif // GUI_MAINMENU_IMPLEMENTATION
diff --git a/source/layout/gui_inventory.h b/source/layout/gui_inventory.h
new file mode 100644
index 0000000..c159e87
--- /dev/null
+++ b/source/layout/gui_inventory.h
@@ -0,0 +1,127 @@
+/*******************************************************************************************
+*
+*   Inventory v1.0.0 - Tool Description
+*
+*   MODULE USAGE:
+*       #define GUI_INVENTORY_IMPLEMENTATION
+*       #include "gui_inventory.h"
+*
+*       INIT: GuiInventoryState state = InitGuiInventory();
+*       DRAW: GuiInventory(&state);
+*
+*   LICENSE: Propietary License
+*
+*   Copyright (c) 2022 SleepeeSoftware. All Rights Reserved.
+*
+*   Unauthorized copying of this file, via any medium is strictly prohibited
+*   This project is proprietary and confidential unless the owner allows
+*   usage in any other form by expresely written permission.
+*
+**********************************************************************************************/
+
+#include "raylib.h"
+
+// WARNING: raygui implementation is expected to be defined before including this header
+#undef RAYGUI_IMPLEMENTATION
+#include "raygui.h"
+
+#include <string.h>     // Required for: strcpy()
+
+#ifndef GUI_INVENTORY_H
+#define GUI_INVENTORY_H
+
+typedef struct {
+    // Define anchors
+    Vector2 anchor01;            // ANCHOR ID:1
+    
+    // Define controls variables
+
+    // Define rectangles
+    Rectangle layoutRecs[3];
+
+    // Custom state variables (depend on development software)
+    // NOTE: This variables should be added manually if required
+
+} GuiInventoryState;
+
+#ifdef __cplusplus
+extern "C" {            // Prevents name mangling of functions
+#endif
+
+//----------------------------------------------------------------------------------
+// Defines and Macros
+//----------------------------------------------------------------------------------
+//...
+
+//----------------------------------------------------------------------------------
+// Types and Structures Definition
+//----------------------------------------------------------------------------------
+// ...
+
+//----------------------------------------------------------------------------------
+// Module Functions Declaration
+//----------------------------------------------------------------------------------
+GuiInventoryState InitGuiInventory(void);
+void GuiInventory(GuiInventoryState *state);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // GUI_INVENTORY_H
+
+/***********************************************************************************
+*
+*   GUI_INVENTORY IMPLEMENTATION
+*
+************************************************************************************/
+#if defined(GUI_INVENTORY_IMPLEMENTATION)
+
+#include "raygui.h"
+
+//----------------------------------------------------------------------------------
+// Global Variables Definition
+//----------------------------------------------------------------------------------
+//...
+
+//----------------------------------------------------------------------------------
+// Internal Module Functions Definition
+//----------------------------------------------------------------------------------
+//...
+
+//----------------------------------------------------------------------------------
+// Module Functions Definition
+//----------------------------------------------------------------------------------
+GuiInventoryState InitGuiInventory(void)
+{
+    GuiInventoryState state = { 0 };
+
+    // Init anchors
+    state.anchor01 = (Vector2){ 840, 72 };            // ANCHOR ID:1
+    
+    // Initilize controls variables
+
+    // Init controls rectangles
+    state.layoutRecs[0] = (Rectangle){ state.anchor01.x + 0, state.anchor01.y + 0, 240, 528 };// GroupBox: Inventory
+    state.layoutRecs[1] = (Rectangle){ state.anchor01.x + -264, state.anchor01.y + 48, 240, 192 };// Panel: DataPanel
+    state.layoutRecs[2] = (Rectangle){ state.anchor01.x + -264, state.anchor01.y + 264, 240, 216 };// Panel: DescriptionPanel
+
+    // Custom variables initialization
+
+    return state;
+}
+
+
+void GuiInventory(GuiInventoryState *state)
+{
+    // Const text
+    const char *InventoryText = "Void Bag";    // GROUPBOX: Inventory
+    
+    // Draw controls
+    GuiGroupBox(state->layoutRecs[0], InventoryText);
+    GuiPanel(state->layoutRecs[1], DataPanelText);
+    GuiPanel(state->layoutRecs[2], DescriptionPanelText);
+}
+
+#endif // GUI_INVENTORY_IMPLEMENTATION
diff --git a/source/layout/gui_settings.h b/source/layout/gui_settings.h
new file mode 100644
index 0000000..826bcc6
--- /dev/null
+++ b/source/layout/gui_settings.h
@@ -0,0 +1,140 @@
+/*******************************************************************************************
+*
+*   Settings v1.0.0 - Tool Description
+*
+*   MODULE USAGE:
+*       #define GUI_SETTINGS_IMPLEMENTATION
+*       #include "gui_settings.h"
+*
+*       INIT: GuiSettingsState state = InitGuiSettings();
+*       DRAW: GuiSettings(&state);
+*
+*   LICENSE: Propietary License
+*
+*   Copyright (c) 2022 SleepeeSoftware. All Rights Reserved.
+*
+*   Unauthorized copying of this file, via any medium is strictly prohibited
+*   This project is proprietary and confidential unless the owner allows
+*   usage in any other form by expresely written permission.
+*
+**********************************************************************************************/
+
+#include "raylib.h"
+
+// WARNING: raygui implementation is expected to be defined before including this header
+#undef RAYGUI_IMPLEMENTATION
+#include "raygui.h"
+
+#include <string.h>     // Required for: strcpy()
+
+#ifndef GUI_SETTINGS_H
+#define GUI_SETTINGS_H
+
+typedef struct {
+    // Define anchors
+    Vector2 anchor01;            // ANCHOR ID:1
+    
+    // Define controls variables
+    int ToggleGroup001Active;            // ToggleGroup: ToggleGroup001
+
+    // Define rectangles
+    Rectangle layoutRecs[6];
+
+    // Custom state variables (depend on development software)
+    // NOTE: This variables should be added manually if required
+
+} GuiSettingsState;
+
+#ifdef __cplusplus
+extern "C" {            // Prevents name mangling of functions
+#endif
+
+//----------------------------------------------------------------------------------
+// Defines and Macros
+//----------------------------------------------------------------------------------
+//...
+
+//----------------------------------------------------------------------------------
+// Types and Structures Definition
+//----------------------------------------------------------------------------------
+// ...
+
+//----------------------------------------------------------------------------------
+// Module Functions Declaration
+//----------------------------------------------------------------------------------
+GuiSettingsState InitGuiSettings(void);
+void GuiSettings(GuiSettingsState *state);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // GUI_SETTINGS_H
+
+/***********************************************************************************
+*
+*   GUI_SETTINGS IMPLEMENTATION
+*
+************************************************************************************/
+#if defined(GUI_SETTINGS_IMPLEMENTATION)
+
+#include "raygui.h"
+
+//----------------------------------------------------------------------------------
+// Global Variables Definition
+//----------------------------------------------------------------------------------
+//...
+
+//----------------------------------------------------------------------------------
+// Internal Module Functions Definition
+//----------------------------------------------------------------------------------
+//...
+
+//----------------------------------------------------------------------------------
+// Module Functions Definition
+//----------------------------------------------------------------------------------
+GuiSettingsState InitGuiSettings(void)
+{
+    GuiSettingsState state = { 0 };
+
+    // Init anchors
+    state.anchor01 = (Vector2){ 96, 96 };            // ANCHOR ID:1
+    
+    // Initilize controls variables
+    state.ToggleGroup001Active = 0;            // ToggleGroup: ToggleGroup001
+
+    // Init controls rectangles
+    state.layoutRecs[0] = (Rectangle){ state.anchor01.x + 0, state.anchor01.y + 0, 840, 504 };// GroupBox: GroupBox000
+    state.layoutRecs[1] = (Rectangle){ 264, 48, 128, 32 };// ToggleGroup: ToggleGroup001
+    state.layoutRecs[2] = (Rectangle){ state.anchor01.x + 0, state.anchor01.y + 0, 840, 504 };// GroupBox: GroupBox002
+    state.layoutRecs[3] = (Rectangle){ state.anchor01.x + 0, state.anchor01.y + 0, 840, 504 };// GroupBox: GroupBox003
+    state.layoutRecs[4] = (Rectangle){ state.anchor01.x + 0, state.anchor01.y + 0, 840, 504 };// GroupBox: GroupBox004
+    state.layoutRecs[5] = (Rectangle){ state.anchor01.x + 0, state.anchor01.y + 0, 840, 504 };// GroupBox: GroupBox005
+
+    // Custom variables initialization
+
+    return state;
+}
+
+
+void GuiSettings(GuiSettingsState *state)
+{
+    // Const text
+    const char *GroupBox000Text = "SAMPLE TEXT";    // GROUPBOX: GroupBox000
+    const char *ToggleGroup001Text = "Gameplay; Sound; Graphic; Control; SECRET";    // TOGGLEGROUP: ToggleGroup001
+    const char *GroupBox002Text = "SAMPLE TEXT";    // GROUPBOX: GroupBox002
+    const char *GroupBox003Text = "SAMPLE TEXT";    // GROUPBOX: GroupBox003
+    const char *GroupBox004Text = "SAMPLE TEXT";    // GROUPBOX: GroupBox004
+    const char *GroupBox005Text = "SAMPLE TEXT";    // GROUPBOX: GroupBox005
+    
+    // Draw controls
+    GuiGroupBox(state->layoutRecs[0], GroupBox000Text);
+    GuiToggleGroup(state->layoutRecs[1], ToggleGroup001Text, &state->ToggleGroup001Active);
+    GuiGroupBox(state->layoutRecs[2], GroupBox002Text);
+    GuiGroupBox(state->layoutRecs[3], GroupBox003Text);
+    GuiGroupBox(state->layoutRecs[4], GroupBox004Text);
+    GuiGroupBox(state->layoutRecs[5], GroupBox005Text);
+}
+
+#endif // GUI_SETTINGS_IMPLEMENTATION