diff --git a/README.md b/README.md
index 86ba79e..0897f77 100644
--- a/README.md
+++ b/README.md
@@ -4,23 +4,25 @@
 
 What is it?
 -----
-openlut is, at its core, a color management library, accessible from **Python 3.5+**. It's built on my own color pipeline needs, which includes managing
+openlut is, at its core, a transform-focused color management library, accessible from **Python 3.5+**. It's built on my own color pipeline needs, which includes managing
 Lookup Tables, Gamma/Gamut functions/matrices, applying color transformations, etc. .
 
-openlut is also a tool. Included soon will be a command line utility letting you perform complex color transformations from the comfort of
-your console. In all cases, interactive usage from a Python console is easy.
+openlut is also a practical tool. Included soon will be a command line utility letting you perform complex color transformations from the comfort of
+your console. Included already is an OpenGL image viewer, which might grow in the future to play sequences.
 
-I wanted it to cover this niche simply and consistently, something color management often isn't! Take a look; hopefully you'll agree :) !
+I wanted it to cover this niche simply and consistently, with batteries included (a library of gamma functions and color gamut matrices).
+Color management doesn't have to be so difficult!
 
 
 What About OpenColorIO? Why does this exist?
 ------
-OpenColorIO is a wonderful library, but seems geared towards managing the complexity of many larger applications in a greater pipeline.
-openlut is more simple; it doesn't care about the big picture - you just do consistent operations on images. openlut also has tools to deal
-with these building blocks, unlike OCIO - resizing LUTs, etc. .
+OpenColorIO does amazing work - but mostly in the context of large applications, not-simple config files, and self-defined color space
+(with the full range of int/float bit depth specifics, etc.)
 
-Indeed, OCIO is just a system these basic operations using LUTs - in somewhat unintuitive ways, in my opinion. You could setup a similar system
-using openlut's toolkit.
+openlut is all about images and the transforms on images. Everything happens in (0, 1) float space. Large emphasis is placed on managing the 
+tools themselves as well - composing matrices, resizing LUTs, defining new gamma functions, etc. .
+
+In many ways, OCIO is a system stringing basic operations together. I'd be perfectly plausible to write an OCIO alternative with openlut in the backend.
 
 
 Installation
@@ -33,9 +35,9 @@ Simply use pip: `sudo pip3 install openlut` (pip3 denotes that you must use a Py
 
 *If it's breaking, try running `sudo pip3 install -U pip setuptools`. Sometimes they are out of date.*
 
-Installing Dependencies
+Installing Compile Dependencies
 -----
-Not Difficult, I promise!
+For the moment, I don't have a Mac wheel. Not Difficult, I promise!
 
 On Debian/Ubuntu: `sudo apt-get install python3-pip gcc pybind11-dev libmagickwand-dev`
 On Mac: `brew install python3 gcc pybind11 imagemagick`
diff --git a/interp.py b/interp.py
new file mode 100644
index 0000000..550afde
--- /dev/null
+++ b/interp.py
@@ -0,0 +1,9 @@
+from functools import reduce
+from imp import reload
+
+import openlut as ol
+from openlut.lib.files import Log
+
+img = ol.ColMap.open('img_test/rock.exr')
+fSeq = img.rgbArr
+lut = ol.LUT.lutFunc(ol.gamma.sRGB)
diff --git a/openlut/ColMap.py b/openlut/ColMap.py
index 8be04b0..bb15021 100644
--- a/openlut/ColMap.py
+++ b/openlut/ColMap.py
@@ -86,7 +86,6 @@ class ColMap :
 		with wand.image.Image(blob=binData, format=fmt, width=width, height=height) as img:
 			return ColMap.fromIntArray(np.fromstring(img.make_blob("RGB"), dtype='uint{}'.format(img.depth)).reshape(img.height, img.width, 3))
 	
-	@staticmethod
 	def toBinary(self, fmt, depth=16) :
 		'''
 		Using Wand blob functionality
@@ -95,7 +94,6 @@ class ColMap :
 			img.format = fmt
 			return img.make_blob()
 	
-	@staticmethod
 	def save(self, path, compress = None, depth = None) :
 		'''
 		Save the image. The filetype will be inferred from the path, and the appropriate backend will be used.
@@ -140,7 +138,7 @@ class ColMap :
 	#Display Functions
 	
 	@staticmethod
-	def display(path, width = 1200) :
+	def display(path, width = 1000) :
 		'''
 		Shows an image at a path without making a ColMap.
 		'''
@@ -153,7 +151,7 @@ class ColMap :
 		
 		Viewer.run(img, xRes, yRes, title = os.path.basename(path))
 		
-	def show(self, width = 1200) :
+	def show(self, width = 1000) :
 		#Use my custom OpenGL viewer!
 		Viewer.run(self.rgbArr, width, int(width * self.rgbArr.shape[0]/self.rgbArr.shape[1]))
 		
diff --git a/openlut/ColMat.py b/openlut/ColMat.py
index a91c9cd..bc99592 100644
--- a/openlut/ColMat.py
+++ b/openlut/ColMat.py
@@ -6,6 +6,7 @@ import numpy as np
 #~ import numba
 
 from .Transform import Transform
+from .lib import olOpt as olo
 
 class ColMat(Transform) :
 	def __init__(self, *mats) :
@@ -21,66 +22,24 @@ class ColMat(Transform) :
 			else :
 				self.mat = np.array(mat) #Simply set self.mat with the numpy array version of the mat.
 		elif len(mats) > 1 :
-			self.mat = ColMat.__mats(*[ColMat(mat) for mat in mats]).mat
+			self.mat = ColMat._mats(*[ColMat(mat) for mat in mats]).mat
 		elif not mats :
 			self.mat = np.identity(3)
 		
-	def __mats(*inMats) :
+	def _mats(*inMats) :
 		'''
-		Initialize a combined Transform matrix from several input ColMats.
+		Initialize a combined Transform matrix from several input ColMats. Use constructor instead.
 		'''
 		return ColMat(reduce(ColMat.__mul__, reversed(inMats))) #Works because multiply is actually non-commutative dot.
 		#This is why we reverse inMats.
-		
-	#~ @numba.jit(nopython=True)
-	def __optDot(img, mat, shp, out) :
-		'''
-		Dots the matrix with each tuple of colors in the img.
-
-		img: Numpy array of shape (height, width, 3).
-		mat: The 3x3 numpy array representing the color transform matrix.
-		shp: The shape of the image.
-		out: the output list. Built mutably for numba's sake.
-		'''
-		shaped = img.reshape((shp[0] * shp[1], shp[2])) #Flatten to 2D array for iteration over colors.
-		i = 0
-		while i < shp[0] * shp[1] :
-			res = np.dot(mat, shaped[i])
-			out[i] = res
-			i += 1
-			
-	def __applMat(q, cpu, shp, mat, img3D) :
-		out = np.zeros((shp[0] * shp[1], shp[2]))
-		ColMat.__optDot(img3D, mat, shp, out)
-		q.put( (cpu, out.reshape(shp)) )
 	
 	def sample(self, fSeq) :
 		shp = np.shape(fSeq)
 		if len(shp) == 1 :
 			return self.mat.dot(fSeq)
 		if len(shp) == 3 :
-			cpus = mp.cpu_count()
-			out = []
-			q = mp.Queue()
-			splt = Transform.spSeq(fSeq, cpus)
-			for cpu in range(cpus) :
-				p = mp.Process(target=ColMat.__applMat, args=(q, cpu, np.shape(splt[cpu]), self.mat, splt[cpu]))
-				p.start()
-				
-			for num in range(len(splt)) :
-				out.append(q.get())
-				
-			return np.concatenate([seq[1] for seq in sorted(out, key=lambda seq: seq[0])], axis=0)
-			
-			#~ out = np.zeros((shp[0] * shp[1], shp[2]))
-			#~ ColMat.__optDot(fSeq, self.mat, shp, out)
-			#~ return out.reshape(shp)
-			
-			#~ return np.array([self.mat.dot(col) for col in fSeq.reshape(shp[0] * shp[1], shp[2])]).reshape(shp)
-			
-			#~ p = mp.Pool(mp.cpu_count())
-			#~ return np.array(list(map(self.mat.dot, fSeq.reshape(shp[0] * shp[1], shp[2])))).reshape(shp)
-		#~ return fSeq.dot(self.mat)
+			#C++ based olo.matr replaces & sped up the operation by 50x with same output!!!
+			return olo.matr(fSeq.reshape(reduce(lambda a, b: a*b, fSeq.shape)), self.mat.reshape(reduce(lambda a, b: a*b, self.mat.shape))).reshape(fSeq.shape)
 		
 	def inv(obj) :
 		if isinstance(obj, ColMat) : #Works on any ColMat object - including self.
diff --git a/openlut/LUT.py b/openlut/LUT.py
index 1f8c9c8..1170367 100644
--- a/openlut/LUT.py
+++ b/openlut/LUT.py
@@ -15,7 +15,7 @@ from .Transform import Transform
 from .lib import olOpt as olo
 
 class LUT(Transform) :
-	def __init__(self, dims = 1, size = 16384, title = "openlut_LUT", iRange = (0.0, 1.0)) :	
+	def __init__(self, dims = 1, size = 4096, title = "openlut_LUT", iRange = (0.0, 1.0)) :	
 		'''
 		Create an identity LUT with given dimensions (1 or 3), size, and title.
 		'''	
@@ -33,7 +33,7 @@ class LUT(Transform) :
 			print("3D LUT Not Implemented!")
 			#~ self.array = np.linspace(self.range[0], self.range[1], self.size**3).reshape(self.size, self.size, self.size) #Should make an identity size x size x size array.
 		
-	def lutFunc(func, size = 16384, dims = 1, title="openlut_FuncGen", iRange = (0.0, 1.0)) :
+	def lutFunc(func, size = 4096, dims = 1, title="openlut_FuncGen", iRange = (0.0, 1.0)) :
 		'''
 		Creates a LUT from a simple function.
 		'''
@@ -69,11 +69,8 @@ class LUT(Transform) :
 		return LUT.lutArray(splev(np.linspace(0, 1, num=len(idArr)), splrep(idArr, mapArr)))
 		
 #LUT Functions.
-	def __interp(q, cpu, spSeq, ID, array, spl) :
-		if spl :
-			q.put( (cpu, splev(spSeq, splrep(ID, array))) ) #Spline Interpolation. Pretty quick, considering.
-		else :
-			q.put( (cpu, np.interp(spSeq, ID, array)) )
+	def _splInterp(q, cpu, spSeq, ID, array) :
+		q.put( (cpu, splev(spSeq, splrep(ID, array))) ) #Spline Interpolation. Pretty quick, considering.
 	
 	def sample(self, fSeq, spl=True) :
 		'''
@@ -91,21 +88,22 @@ class LUT(Transform) :
 				
 		fSeq = np.array(fSeq)
 		if self.dims == 1 :
-			#~ return np.interp(spSeq, self.ID, self.array)
-			
 			#If scipy isn't loaded, we can't use spline interpolation!
-			if (not MOD_SCIPY) or self.size > 1023: spl = False # Auto-adapts big LUTs to use the faster, more brute-forceish, linear interpolation.
-			out = []
-			q = mp.Queue()
-			splt = Transform.spSeq(fSeq, mp.cpu_count())
-			for cpu in range(mp.cpu_count()) :
-				p = mp.Process(target=LUT.__interp, args=(q, cpu, splt[cpu], self.ID, self.array, spl))
-				p.start()
-				
-			for num in range(len(splt)) :
-				out.append(q.get())
-				
-			return np.concatenate([seq[1] for seq in sorted(out, key=lambda seq: seq[0])], axis=0)
+			if (not MOD_SCIPY) or self.size > 25 : # Auto-adapts all but the smallest LUTs to use the faster linear interpolation.
+				return olo.lut1dlin(fSeq.reshape(reduce(lambda a, b: a*b, fSeq.shape)), self.array, self.range[0], self.range[1]).reshape(fSeq.shape)
+			else :
+				#~ return np.interp(spSeq, self.ID, self.array) #non-threaded way.
+				out = []
+				q = mp.Queue()
+				splt = Transform.spSeq(fSeq, mp.cpu_count())
+				for cpu in range(mp.cpu_count()) :
+					p = mp.Process(target=LUT._splInterp, args=(q, cpu, splt[cpu], self.ID, self.array))
+					p.start()
+					
+				for num in range(len(splt)) :
+					out.append(q.get())
+					
+				return np.concatenate([seq[1] for seq in sorted(out, key=lambda seq: seq[0])], axis=0)
 			
 		elif self.dims == 3 :
 			print("3D LUT Not Implemented!")
diff --git a/openlut/Viewer.py b/openlut/Viewer.py
index 13ca3ca..f7cc4d1 100644
--- a/openlut/Viewer.py
+++ b/openlut/Viewer.py
@@ -1,10 +1,20 @@
+import multiprocessing as mp
+
+#Future: Use GLFW
 import pygame
 from pygame.locals import *
 
+import numpy as np
+
 MOD_OPENGL = True
 try :
 	from OpenGL.GL import *
+	from OpenGL.GL.shaders import compileShader,ShaderProgram
 	from OpenGL.GLU import *
+	from OpenGL.arrays import vbo #This is a class that makes it easy to use Vertex Buffer Objects.
+	from OpenGL.GL.framebufferobjects import *
+	from OpenGL.GL.EXT.framebuffer_object import *
+	#~ from OpenGLContext.arrays import *
 except :
 	print('Unable to load OpenGL. Make sure your graphics drivers are installed & up to date!')
 	MOD_OPENGL = False
@@ -16,40 +26,94 @@ class Viewer :
 	def __init__(self, res, title="OpenLUT Image Viewer") :
 		self.res = res
 		
+		#Vertex shaders calculate vertex positions - gl_position, which is a vec4.
+		#In our case, this vec4 is on a ortho projected square in front of the screen.
+		#~ self.shaderVertex = compileShader("""#version 330 core
+#~ layout (location = 0) in vec2 position;
+#~ layout (location = 1) in vec2 texCoords;
+
+#~ out vec2 TexCoords;
+
+#~ void main()
+#~ {
+    #~ gl_Position = vec4(position.x, position.y, 0.0f, 1.0f); 
+    #~ TexCoords = texCoords;
+#~ }  
+#~ """, GL_VERTEX_SHADER )
+		
+		#After a vertex is processed, clupping happens, etc. Then frag shader.
+		#Fragment shaders make "fragments" - pixels, subpixels, hidden stuff, etc. . They can do per pixel stuff.
+		#Goal: Make gl_FragColor, the color of the fragment. It's a vec4.
+		#In this case, we're sampling the texture coordinates.
+		#~ self.shaderFrag = compileShader("""#version 330 core
+#~ in vec2 TexCoords;
+#~ out vec4 color;
+
+#~ uniform sampler2D screenTexture;
+
+#~ void main()
+#~ { 
+    #~ color = texture(screenTexture, TexCoords);
+#~ }
+#~ """, GL_FRAGMENT_SHADER )
+
+		#Convenience for glCreateProgram, then attaches each shader via pointer, links with glLinkProgram,
+		#validates with glValidateProgram and glGetProgramiv, then cleanup & return shader program.
+		#~ self.shader = Viewer.shaderProgramCompile(self.shaderVertex, self.shaderFrag)
+		#~ self.vbo = self.bindVBO()
+				
+		#Init pygame in OpenGL double-buffered mode.
 		pygame.init()
 		pygame.display.set_caption(title)
-		pygame.display.set_mode(res, DOUBLEBUF|OPENGL)
+		pygame.display.set_mode((res), DOUBLEBUF|OPENGL)
 		
+		#Initialize OpenGL.
 		self.initGL()
-				
+		
+	def shaderProgramCompile(*shaders) :
+		prog = glCreateProgram()
+		for shader in shaders :
+			glAttachShader(prog, shader)
+		prog = ShaderProgram(prog)
+		glLinkProgram(prog)
+		return prog
+		
 	def initGL(self) :
 		'''
 		Initialize OpenGL.
 		'''
+		#Start up OpenGL in Ortho projection mode.
 		glEnable(GL_TEXTURE_2D)
 		
+		glViewport(0, 0, self.res[0], self.res[1])
+		
 		glMatrixMode(GL_PROJECTION)
 		glLoadIdentity()
 		glOrtho(0, self.res[0], self.res[1], 0, 0, 100)
 		
 		glMatrixMode(GL_MODELVIEW)
+		#~ glUseProgram(self.shader)
 		
-		#~ glClearColor(0, 0, 0, 0)
-		#~ glClearDepth(0)
-		#~ glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT)
+	def resizeWindow(self, newRes) :
+		#~ print(newRes)
+		self.res = newRes
+		pygame.display.set_mode(self.res, DOUBLEBUF|OPENGL)
+		glViewport(0, 0, self.res[0], self.res[1]) #Reset viewport
 		
-	#~ def resizeWindow(self, newRes) :
-		#~ self.res = newRes
-		#~ pygame.display.set_mode(self.res, RESIZABLE|DOUBLEBUF|OPENGL)
-		##~ glLoadIdentity()
-		##~ glOrtho(0, self.res[0], self.res[1], 0, 0, 100)
+		glMatrixMode(GL_PROJECTION) #Modify projection matrix
+		glLoadIdentity() #Load in identity matrix
+		glOrtho(0, self.res[0], self.res[1], 0, 0, 100) #New projection matrix
 		
-		##~ glMatrixMode(GL_MODELVIEW)
-	
-	def drawQuad(self) :
+		glMatrixMode(GL_MODELVIEW) #Switch back to model matrix.
+		glLoadIdentity() #Load an identity matrix into the model-view matrix
+		
+		#~ pygame.display.flip()
+				
+	def drawImage(self) :
 		'''
 		Draws an image to the screen.
 		'''
+		#~ print("\r", self.res, end="", sep="")
 		glBegin(GL_QUADS)
 		
 		glTexCoord2i(0, 0)
@@ -66,40 +130,100 @@ class Viewer :
 		
 		glEnd()
 		
-	def bindTex(self, img)	:
+	def bindVBO(self, verts=np.array([[0,1,0],[-1,-1,0],[1,-1,0]], dtype='f')) :
+		vertPos = vbo.VBO(verts)
+		
+		indices = np.array([[0, 1, 2]], dtype=np.int32)
+		indPos = vbo.VBO(indices, target=GL_ELEMENT_ARRAY_BUFFER)
+		
+		return (vertPos, indPos)
+		
+	def bindFBO(self) :
+		'''
+		Create and bind a framebuffer for rendering (loading images) to.
+		'''
+		
+		fbo = glGenFramebuffers(1) #Create framebuffer
+		
+		#Binding it makes the next read and write framebuffer ops affect the bound framebuffer.
+		#You can also bind it specifically to read/write targets. GL_READ_FRAMEBUFFER and GL_DRAW_FRAMEBUFFER.
+		glBindFramebuffer(GL_FRAMEBUFFER, fbo)
+		
+		#It needs 1+ same sampled buffers (color, depth, stencil) and a "complete" color attachment.
+		
+		#Create a texture to render to. Empty for now; size is screen size.
+		tex = self.bindTex(None, res=self.res) #Fill it up with nothing, for now. It's our color attachment.
+		
+		glBindTexture(GL_TEXTURE_2D, 0)
+		
+		#Target is framebuffer, attachment is color, textarget is 2D texture, the texture is tex, the mipmap level is 0.
+		#We attach the texture to the frame buffer.
+		glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D, tex, 0)
+		
+		#Renderbuffers are write-only; can't be sampled, just displayed. Often used as depth and stencil. So useless here :).
+		
+		if glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE :
+			print("Framebuffer not complete!")
+			
+		glBindFramebuffer(GL_FRAMEBUFFER, 0); #Finally - bind the framebuffer!
+		
+		#We're now rendering to the framebuffer texture. How cool!
+		
+		return fbo
+		
+				
+	def bindTex(self, img, res=None)	:
 		'''
 		Binds the image contained the numpy float array img to a 2D texture on the GPU.
 		'''
-		id = glGenTextures(1)
+		if not res: res = img.shape
+		
+		tex = glGenTextures(1)
 		
 		glPixelStorei(GL_UNPACK_ALIGNMENT, 1)
-		glBindTexture(GL_TEXTURE_2D, id)
+		glBindTexture(GL_TEXTURE_2D, tex)
 		
-		glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE)
-		glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE)
-		glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR)
+		glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP) #Clamp to edge
+		glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP)
+		glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR) #Mag/Min Interpolation
 		glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR)
 		
-		glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, img.shape[1], img.shape[0], 0, GL_RGB, GL_FLOAT, img)
+		glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, res[1], res[0], 0, GL_RGB, GL_FLOAT, img)
 		
-	def display(self) :
+		return tex
+		
+	def display(self, fbo = 1, tex = 1) :
 		'''
 		Repaints the window.
 		'''
 		
-		#Clears the "canvas"
-		glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT)
+		#Here, we do things to the framebuffer. Not the screen. Important.
+		#~ glBindFramebuffer(GL_FRAMEBUFFER, fbo)
+		#~ glClearColor(0, 0, 0, 1.0)
+		glClear(GL_COLOR_BUFFER_BIT)
 		glMatrixMode(GL_MODELVIEW)
 		
-		#Maybe do them here.
+		#This render is rendering to framebuffer
 		glEnable(GL_TEXTURE_2D)
-		self.drawQuad()
+		self.drawImage()
+		
+		#~ #Back to the screen.
+		#~ glBindFramebuffer(GL_FRAMEBUFFER, 0)
+		#~ glClearColor(0, 1, 1, 1)
+		#~ glClear(GL_COLOR_BUFFER_BIT)
+		
+		#~ glUseProgram(self.shader)
+		#~ glBindTexture(GL_TEXTURE_2D, tex)
+		
+		#~ glBindVertexArray(0)
+		#~ glUseProgram(0)
 		
 		#Updates the display.
 		pygame.display.flip()
 		
 	def close() :
-		#~ print()
+		print()
+		#~ glUseProgram(0)
 		pygame.quit()
 		
 	def run(img, xRes, yRes, title = "OpenLUT Image Viewer") :
@@ -109,28 +233,31 @@ class Viewer :
 		if not MOD_OPENGL: print("OpenGL not enabled. Viewer won't start."); return
 		
 		v = Viewer((xRes, yRes), title)
-		v.bindTex(img)
+		gpuImg = v.bindTex(img)
+		#~ gpuBuf = v.bindFBO()
 		
 		FPS = None
 		clock = pygame.time.Clock()
 		
-		while True :
+		while True :			
 			for event in pygame.event.get() :
 				if event.type == pygame.QUIT: Viewer.close(); break
 				
-				#~ if event.type == pygame.VIDEORESIZE :
-					#~ v.resizeWindow((event.w, event.h))
+				if event.type == pygame.VIDEORESIZE :
+					v.resizeWindow((event.w, event.h))
 					
 				if event.type == pygame.KEYDOWN :
+					if str(event.key) == "27": Viewer.close(); break #Need to catch ESC to close the window.
+					
 					try :
 						{
 							
 						}[event.key]()
 					except KeyError as key :
-						if str(key) == "27": Viewer.close(); break #Need to catch ESC to close the window.
 						print("Key not mapped!")
 			else :
 				#This else will only run if the event loop is completed.
+				#~ v.display(fbo = gpuBuf, tex = gpuImg)
 				v.display()
 				
 				#Smooth playback at FPS.
diff --git a/openlut/__init__.py b/openlut/__init__.py
index 95a9b7b..9ef09d4 100644
--- a/openlut/__init__.py
+++ b/openlut/__init__.py
@@ -6,5 +6,9 @@ from .Func import Func
 from .ColMat import ColMat
 from .Viewer import Viewer
 
+#Ensure the package namespace lines up.
+from . import gamma
+from . import gamut
+
 __all__ = ['ColMap', 'Transform', 'LUT', 'Func', 'ColMat', 'Viewer', 'gamma', 'gamut']
 
diff --git a/openlut/lib/files.py b/openlut/lib/files.py
index 1b32eb0..297bceb 100755
--- a/openlut/lib/files.py
+++ b/openlut/lib/files.py
@@ -19,6 +19,17 @@ Copyright 2016 Sofus Rose
 import sys, os, time
 import multiprocessing as mp
 
+import numpy as np
+
+MOD_MATPLOTLIB = False
+try:
+	import matplotlib.pyplot as plt
+	import matplotlib.mlab as mlab
+	
+	MOD_MATPLOTLIB = True
+except:
+	print("Matplotlib not installed. Graphs won't be drawn")
+
 class Files :
 	"""
 	The Files object is an immutable sequence of files, which supports writing simultaneously to all the files.
@@ -196,6 +207,41 @@ class Log(ColLib) :
 			return self.getLogTime() - self.sTimes[run]
 		else :
 			raise ValueError('Run wasn\'t found!!')
+			
+	@staticmethod
+	def bench(f, args=[], kwargs={}, trials=15, graph=False) :
+		def t(): l = Log(); l.startTime(0); f(*args, **kwargs); return l.getTime(0)
+		
+		data = np.array([t() for i in range(trials)])
+		anyl = {		'mean'		: np.mean(data),
+						'median'	: np.median(data),
+						'std_dev'	: np.std(data),
+						'vari'		: np.std(data) ** 2,
+						'total'		: sum(data)
+		}
+		
+		if graph: Log.graphBench(anyl)
+			
+		return anyl
+		
+	@staticmethod
+	def graphBench(anyl) :
+		if MOD_MATPLOTLIB :
+			fig = plt.figure()
+			
+			x = np.linspace(-3 * anyl['std_dev'] + anyl['mean'], 3 * anyl['std_dev'] + anyl['mean'], 100)
+			
+			plt.plot(x, mlab.normpdf(x, anyl['mean'], anyl['std_dev']))
+			
+			plt.axvline(x = anyl['mean'], color='red', linestyle = "--")
+			plt.text(	anyl['mean'] - 0.2 * anyl['std_dev'], 0, 'mean',
+						horizontalalignment = 'left', verticalalignment='bottom',
+						rotation = 90, fontsize=10, fontstyle='italic'
+			)
+			plt.xlabel('Time (Seconds)', fontsize=15)
+			plt.ylabel('Distribution', fontsize=11)
+			
+			plt.show()
 
 	def compItem(self, state, time, *text) :
 		"""
diff --git a/openlut/lib/olOpt.cpp b/openlut/lib/olOpt.cpp
index 175d664..83b8a04 100644
--- a/openlut/lib/olOpt.cpp
+++ b/openlut/lib/olOpt.cpp
@@ -10,6 +10,8 @@
 
 //~ #include "samplers.h"
 
+//~ #define EPSILON 0.0001
+
 namespace py = pybind11;
 using namespace std;
 
@@ -26,7 +28,6 @@ float sLog(float x) { return (0.432699 * log10(x + 0.037584) + 0.616596) + 0.03;
 float sLog2(float x) { return ( 0.432699 * log10( (155.0 * x) / 219.0 + 0.037584) + 0.616596 ) + 0.03; }
 float DanLog(float x) { return x > 0.1496582 ? (pow(10.0, ((x - 0.385537) / 0.2471896)) - 0.071272) / 3.555556 : (x - 0.092809) / 5.367655; }
 
-
 //gam lets the user pass in any 1D array, any one-arg C++ function, and get a result. It's multithreaded, vectorized, etc. .
 py::array_t<float> gam(py::array_t<float> arr, const std::function<float(float)> &g_func) {
 	py::buffer_info bufIn = arr.request();
@@ -54,6 +55,95 @@ py::array_t<float> gam(py::array_t<float> arr, const std::function<float(float)>
 }
 
 
+//lut1d takes a flattened image array and a flattened 1D array, and returns a linearly interpolated result.
+py::array_t<float> lut1dlin(py::array_t<float> img, py::array_t<float> lut, float lBound, float hBound) {
+	py::buffer_info bufImg = img.request(), bufLUT = lut.request();
+	
+	//To use with an image, MAKE SURE to flatten the 3D array to a 1D array, then back out to a 3D array after.
+	if (bufImg.ndim == 1 && bufLUT.ndim == 1) {
+		//Make numpy allocate the buffer of the new array.
+		auto result = py::array_t<float>(bufImg.size);
+		
+		//Get the bufOut pointers that we can manipulate from C++.
+		auto bufOut = result.request();
+		
+		float 	*ptrImg = (float *) bufImg.ptr,
+				*ptrLUT = (float *) bufLUT.ptr,
+				*ptrOut = (float *) bufOut.ptr;
+		
+		//Iterate over flat array. Each value gets scaled according to the LUT.
+		#pragma omp parallel for
+		for (size_t i = 0; i < bufImg.shape[0]; i++) {
+			//~ std::cout << g_func(ptrImg[i]) << std::endl;
+			//~ std::cout << g_func(ptrImg[i]) << std::endl;
+			
+			float val = ptrImg[i];
+			
+			if (val <= lBound) { ptrOut[i] = ptrLUT[0]; continue; }
+			else if (val >= hBound) { ptrOut[i] = ptrLUT[bufLUT.shape[0] - 1]; continue; } //Some simple clipping. So it's safe to index.
+			
+			float lutVal = val * bufLUT.shape[0]; //Need the value in relation to LUT indices.
+			//Essentially, we're gonna index by this above with simple math.
+			
+			// Linear Interpolation: y = y0 + (x - x0) * ( (y1 - y0) / (x1 - x0) )
+			// See https://en.wikipedia.org/wiki/Linear_interpolation#Linear_interpolation_between_two_known_points .
+			// (x0, y0) is lower point, (x, y) is higher point.
+			int x0 = (int)floor(lutVal);
+			int x1 = (int)ceil(lutVal); //Internet says this is safe. Yay internet...
+			
+			float y0 = ptrLUT[x0];
+			float y1 = ptrLUT[x1];
+			
+			// (y1 - y0) is divided by the result of (float)(x1 - x0) - but no need to write it; a ceil'ed minus a floor'ed int is just 1.
+			ptrOut[i] = y0 + (lutVal - (float)x0) * ( (y1 - y0) );
+		}
+		
+		return result;
+	}
+}
+
+
+//matr takes a flattened image array and a flattened 3x3 matrix.
+py::array_t<float> matr(py::array_t<float> img, py::array_t<float> mat) {
+	py::buffer_info bufImg = img.request(), bufMat = mat.request();
+	
+	//To use with an image, MAKE SURE to flatten the 3D array to a 1D array, then back out to a 3D array after.
+	if (bufImg.ndim == 1 && bufMat.ndim == 1) {
+		//Make numpy allocate the buffer of the new array.
+		auto result = py::array_t<float>(bufImg.size);
+		
+		//Get the bufOut pointers that we can manipulate from C++.
+		auto bufOut = result.request();
+		
+		float 	*ptrImg = (float *) bufImg.ptr,
+				*ptrMat = (float *) bufMat.ptr,
+				*ptrOut = (float *) bufOut.ptr;
+		
+		//We flatly (parallelly) iterate by threes - r, g, b. To do matrix math. Yay!
+		#pragma omp parallel for
+		for (size_t i = 0; i < bufImg.shape[0]; i+=3) {
+			//~ std::cout << g_func(ptrImg[i]) << std::endl;
+			//~ std::cout << g_func(ptrImg[i]) << std::endl;
+			
+			/* Remember: We're dealing with a flattened matrix here. Indices for ptrMat:
+			*	0	1	2
+			*	3	4	5
+			* 	6	7	8
+			*/
+			
+			float	r = ptrImg[i],
+					g = ptrImg[i + 1],
+					b = ptrImg[i + 2];
+			
+			ptrOut[i] = r * ptrMat[0] + g * ptrMat[1] + b * ptrMat[2]; //Red
+			ptrOut[i + 1] = r * ptrMat[3] + g * ptrMat[4] + b * ptrMat[5]; //Green
+			ptrOut[i + 2] = r * ptrMat[6] + g * ptrMat[7] + b * ptrMat[8]; //Blue
+		}
+		
+		return result;
+	}
+}
+
 
 
 
@@ -62,9 +152,23 @@ PYBIND11_PLUGIN(olOpt) {
 	
 	mod.def(	"gam",
 				&gam,
-				"The sRGB function, vectorized."
+				"Apply any one-argument C++ function to a flattened numpy array; vectorized & parallel."
 	);
 	
+	mod.def(	"matr",
+				&matr,
+				"Apply any flattened color matrix to a flattened numpy image array; vectorized & parallel."
+	);
+	
+	mod.def(	"lut1dlin",
+				&lut1dlin,
+				"Apply any 1D LUT to a flattened numpy image array; vectorized & parallel."
+	);
+	
+	
+	
+	//Simple Gamma Functions
+	
 	mod.def(	"lin",
 				&lin,
 				"The linear function."
diff --git a/performance.txt b/performance.txt
new file mode 100644
index 0000000..b62d608
--- /dev/null
+++ b/performance.txt
@@ -0,0 +1,13 @@
+1080p image (rock.exr), preloaded into the ColMap img. Transform preloaded into the Transform tran. What's timed is the application with apply().
+
+The amount of time to apply each given Transform to a 1920*1080 Image on my 4 code (8 thread) CPU:
+
+apply(ol.LUT): 0.026462205679908948,, (avg. 100 Trials) *sRGB LUT
+apply(ol.Func): 0.064781568400030659, (avg. 100 Trials) *C++ Function sRGB
+apply(ol.Func): 0.55080005893347939, (avg. 15 Trials) *Python Function sRGB
+apply(ol.ColMat): 0.019661276286992234, (avg. 1000 Trials)
+
+#OLD
+apply(ol.ColMat): 0.98610644233346345, (avg. 15 Trials) *ACES --> sRGB
+apply(ol.LUT): 0.15440896909999538, (avg. 100 Trials) *sRGB LUT
+
diff --git a/setup.py b/setup.py
index e94ce13..3b03a06 100755
--- a/setup.py
+++ b/setup.py
@@ -15,7 +15,7 @@ from setuptools import find_packages
 #Better - Mac & Linux only.
 #~ pyPath = '/usr/local/include/python{}'.format(get_python_version())'
 
-cpp_args = ['-fopenmp', '-std=gnu++14']
+cpp_args = ['-fopenmp', '-std=gnu++14', '-O3']
 link_args = ['-fopenmp']
 
 olOpt = Extension(	'openlut.lib.olOpt',
@@ -27,7 +27,7 @@ olOpt = Extension(	'openlut.lib.olOpt',
 		)
 
 setup(	name = 'openlut',
-		version = '0.1.4',
+		version = '0.2.0',
 		description = 'OpenLUT is a practical color management library.',
 		author = 'Sofus Rose',
 		author_email = 'sofus@sofusrose.com',