glshim updated, added latest changes by ptitSeb

This commit is contained in:
lubomyr
2016-02-24 14:55:36 +02:00
parent 2c00167f4e
commit a460cd8fbc
9 changed files with 848 additions and 20 deletions

View File

@@ -12,15 +12,6 @@ option(PANDORA "Set to ON if targeting an OpenPandora device" ${PANDORA})
option(BCMHOST "Set to ON if targeting an RPi(2) device" ${BCMHOST})
option(ODROID "Set to ON if targeting an ODroid device" ${ODROID})
#select at least 1 plateform, PANDORA is default...
if(NOT PANDORA)
if(NOT BCMHOST)
if(NOT ODROID)
set(PANDORA ON CACHE BOOL "Set to ON if targeting an OpenPandora device" FORCE)
endif()
endif()
endif()
# Raspberry PI
if(BCMHOST)
include_directories(/opt/vc/include /opt/vc/include/interface/vcos/pthreads /opt/vc/include/interface/vmcs_host/linux)

View File

@@ -13,7 +13,6 @@ Some know limitations:
* NPOT texture are supported, but not with GL_REPEAT / GL_MIRRORED, only GL_CLAMP will work properly
* Framebuffer use FRAMEBUFFER_OES extension (that must be present in the GLES 1.1 stack)
* Multiple Color attachment on Framebuffer are not supported
* Internaly, Texture coordinates are 2D, Vertex are 3D only (no real support for W for vertex, or 3D texture / cubemaps)
* Probably many other things
----
@@ -56,3 +55,109 @@ Installation
----
Put lib/libGL.so.1 in your `LD_LIBRARY_PATH`.
----
Usage
----
There are many environnement variable to control glshim behavour. All are numeric, except LIBGL_VERSION that take a string.
##### LIBGL_FB
Controls the Framebuffer output
* 0 : Default, using standard x11 rendering
* 1 : Use Framebuffer output (x11 bypassed, only fullscreen)
* 2 : Use Framebuffer, but also an intermediary FBO
##### LIBGL_XREFRESH
Debug helper in specific cases
* 0 : Default, nothing special
* 1 : xrefresh will be called on cleanup
##### LIBGL_STACKTRACE
Automatic Backtrace log
* 0 : Default, nothing special
* 1 : stacktrace will be printed on crash
##### LIBGL_VSYNC
VSync control
* 0 : Default, nothing special
* 1 : vsync enabled
##### LIBGL_RECYCLEFBO
Recycling FBO special case (don't delete a created FBO, but recycle it if needed)
* 0 : Default, nothing special
* 1 : Recycling of FBO enabled
##### LIBGL_MIPMAP
Handling of Manual and Automatic MIPMAP
* 0 : Default, nothing special
* 1 : AutoMipMap forced
* 2 : guess AutoMipMap (based on manual mipmaping on textures)
* 3 : ignore MipMap (mipmap creation / use entirely disabled)
* 4 : ignore AutoMipMap on non-squared textures
##### LIBGL_TEXCOPY
Make a local copy of every texture for easy glGetTexImage2D
* 0 : Default, nothing special
* 1 : Texture copy enabled
##### LIBGL_SHRINK
Texture shrinking control
* 0 : Default, nothing special
* 1 : everything / 2 (using original algorithm for size reduction, all other shink mode use a refined algorithm)
* 2 : only textures wich one size > 512 are / 2
* 3 : only textures wich one size > 256 are / 2
* 4 : only textures wich one size > 256 are / 2, and the one > 1024 are / 4
* 5 : only textures wich one size > 256 are resized to 256 (if possible, because only /2 and /4 exists), but empty texture are not shrinked
* 6 : only textures wich one size > 128 are / 2, thoses >= 512 are resized to 256 (if possible, because only /2 and /4 exists), but empty texture are not shrinked
* 7 : only textures wich one size > 512 are / 2, but empty texture are not shrinked
* 8 : advertise a max texture size of 8192, but every texture wich one size > 2048 are shrinked to 2048
* 9 : advertise a max texture size of 8192, but every texture wich one size > 4096 are / 4 and the one > 512 are / 2, but empty texture are not shrinked
* 10: advertise a max texture size of 8192, but every texture wich one size > 2048 are / 4 and the one > 512 are / 2, but empty texture are not shrinked
##### LIBGL_TEXDUMP
Texture dump
* 0 : Default, nothing special
* 1 : Texture dump enabled
##### LIBGL_ALPHAHACK
Experimental: enable Alpha test only when using texture that contains an alpha channel
* 0 : Default, nothing special
* 1 : Alpha Hack enabled
##### LIBGL_STREAM
PANDORA only: enable Texture Streaming (works only on RGB textures)
* 0 : Default, nothing special
* 1 : Enabled on empty RGB textures
* 2 : Enabled on all RGB textures
##### LIBGL_COPY
Control the glCopyTex(Sub)Image2D hack (they are buggy on pandora and don't work most of the time)
* 0 : Don't use native glCopyTex(Sub)Image2D, but a workaround function using FBO
* 1 : No glCopyTexImage2D / glCopyTexSubImage2D hack, use native ones
##### LIBGL_NOLUMALPHA
Control the availability of the LUMUNANCE_ALPHA format (can be buggy on Pandora model CC)
* 0 : Default,GL_LUMINANCE_ALPHA is available and used if needed
* 1 : GL_LUMINANCE_ALPHA hardware support disabled (a GL_RGBA texture will be used instead)
##### LIBGL_BLENDHACK
Experimental: Change Blend GL_SRC_ALPHA, GL_ONE to GL_ONE, GL_ONE
* 0 : Default, nothing special
* 1 : Change Blend GL_SRC_ALPHA, GL_ONE to GL_ONE, GL_ONE (can be usefull for Xash3D engine)
##### LIBGL_VERSION
Control the glGetString version. Overide version string (should be in the form of "1.x")
##### LIBGL_BATCH
Experimental: Batch mode (fuse of contigous Display list, to limit Draw calls)
* 0 : Default, only Draw list created during a glList are fused (i.e. contigous glBegin / glEnd inside a List)
* 1 : Force a maximum of call to be batched (like if all was inside a big glList)
* 2 : Disable Batch mode completly, no fuse of draw list
##### LIBGL_NOERROR
Hack: glGetError() always return GL_NOERROR
* 0 : Default, glGetError behave as it should
* 1 : glGetError never fail.

View File

@@ -156,6 +156,14 @@
#define GL_RGB16 0x8054
#define GL_RGBA4 0x8056
#define GL_RGB5_A1 0x8057
#define GL_COMPRESSED_ALPHA 0x84E9
#define GL_COMPRESSED_LUMINANCE 0x84EA
#define GL_COMPRESSED_LUMINANCE_ALPHA 0x84EB
#define GL_COMPRESSED_INTENSITY 0x84EC
#define GL_COMPRESSED_RGB 0x84ED
#define GL_COMPRESSED_RGBA 0x84EE
#define GL_COMPRESSED_TEXTURE_FORMATS 0x86A3
// types
#define GL_BYTE 0x1400
#define GL_UNSIGNED_BYTE 0x1401

View File

@@ -51,6 +51,11 @@ const char* PrintEnum(GLenum what) {
p(GL_LUMINANCE8);
p(GL_LUMINANCE16);
p(GL_ALPHA8);
p(GL_COMPRESSED_ALPHA);
p(GL_COMPRESSED_LUMINANCE);
p(GL_COMPRESSED_LUMINANCE_ALPHA);
p(GL_COMPRESSED_RGB);
p(GL_COMPRESSED_RGBA);
// type
p(GL_UNSIGNED_BYTE);
p(GL_UNSIGNED_BYTE_2_3_3_REV);

View File

@@ -313,4 +313,12 @@ void DecompressBlockDXT3(uint32_t x, uint32_t y, uint32_t width,
DecompressBlockDXT1Internal (blockStorage,
image + x + (y * width), width, alphaValues);
}
}
// Texture DXT1 / DXT5 compression
// Using STB "on file" library
// go there https://github.com/nothings/stb
// for more details and other libs
#define STB_DXT_IMPLEMENTATION
#include "stb_dxt_104.h"

View File

@@ -32,6 +32,7 @@ GLuint gl_mergelist = 1;
int blendhack = 0;
char gl_version[50];
int initialized = 0;
int noerror = 0;
__attribute__((constructor))
void initialize_glshim() {
@@ -1727,6 +1728,8 @@ void glPopMatrix() __attribute__((alias("glshim_glPopMatrix")));
GLenum glshim_glGetError() {
LOAD_GLES(glGetError);
if(noerror)
return GL_NO_ERROR;
if (glstate.shim_error) {
GLenum tmp = glstate.last_error;
glstate.last_error = GL_NO_ERROR;

View File

@@ -0,0 +1,624 @@
// stb_dxt.h - v1.04 - DXT1/DXT5 compressor - public domain
// original by fabian "ryg" giesen - ported to C by stb
// use '#define STB_DXT_IMPLEMENTATION' before including to create the implementation
//
// USAGE:
// call stb_compress_dxt_block() for every block (you must pad)
// source should be a 4x4 block of RGBA data in row-major order;
// A is ignored if you specify alpha=0; you can turn on dithering
// and "high quality" using mode.
//
// version history:
// v1.04 - (ryg) default to no rounding bias for lerped colors (as per S3TC/DX10 spec);
// single color match fix (allow for inexact color interpolation);
// optimal DXT5 index finder; "high quality" mode that runs multiple refinement steps.
// v1.03 - (stb) endianness support
// v1.02 - (stb) fix alpha encoding bug
// v1.01 - (stb) fix bug converting to RGB that messed up quality, thanks ryg & cbloom
// v1.00 - (stb) first release
#ifndef STB_INCLUDE_STB_DXT_H
#define STB_INCLUDE_STB_DXT_H
// compression mode (bitflags)
#define STB_DXT_NORMAL 0
#define STB_DXT_DITHER 1 // use dithering. dubious win. never use for normal maps and the like!
#define STB_DXT_HIGHQUAL 2 // high quality mode, does two refinement steps instead of 1. ~30-40% slower.
void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src, int alpha, int mode);
#define STB_COMPRESS_DXT_BLOCK
#ifdef STB_DXT_IMPLEMENTATION
// configuration options for DXT encoder. set them in the project/makefile or just define
// them at the top.
// STB_DXT_USE_ROUNDING_BIAS
// use a rounding bias during color interpolation. this is closer to what "ideal"
// interpolation would do but doesn't match the S3TC/DX10 spec. old versions (pre-1.03)
// implicitly had this turned on.
//
// in case you're targeting a specific type of hardware (e.g. console programmers):
// NVidia and Intel GPUs (as of 2010) as well as DX9 ref use DXT decoders that are closer
// to STB_DXT_USE_ROUNDING_BIAS. AMD/ATI, S3 and DX10 ref are closer to rounding with no bias.
// you also see "(a*5 + b*3) / 8" on some old GPU designs.
// #define STB_DXT_USE_ROUNDING_BIAS
#include <stdlib.h>
#include <math.h>
#include <string.h> // memset
static unsigned char stb__Expand5[32];
static unsigned char stb__Expand6[64];
static unsigned char stb__OMatch5[256][2];
static unsigned char stb__OMatch6[256][2];
static unsigned char stb__QuantRBTab[256+16];
static unsigned char stb__QuantGTab[256+16];
static int stb__Mul8Bit(int a, int b)
{
int t = a*b + 128;
return (t + (t >> 8)) >> 8;
}
static void stb__From16Bit(unsigned char *out, unsigned short v)
{
int rv = (v & 0xf800) >> 11;
int gv = (v & 0x07e0) >> 5;
int bv = (v & 0x001f) >> 0;
out[0] = stb__Expand5[rv];
out[1] = stb__Expand6[gv];
out[2] = stb__Expand5[bv];
out[3] = 0;
}
static unsigned short stb__As16Bit(int r, int g, int b)
{
return (stb__Mul8Bit(r,31) << 11) + (stb__Mul8Bit(g,63) << 5) + stb__Mul8Bit(b,31);
}
// linear interpolation at 1/3 point between a and b, using desired rounding type
static int stb__Lerp13(int a, int b)
{
#ifdef STB_DXT_USE_ROUNDING_BIAS
// with rounding bias
return a + stb__Mul8Bit(b-a, 0x55);
#else
// without rounding bias
// replace "/ 3" by "* 0xaaab) >> 17" if your compiler sucks or you really need every ounce of speed.
return (2*a + b) / 3;
#endif
}
// lerp RGB color
static void stb__Lerp13RGB(unsigned char *out, unsigned char *p1, unsigned char *p2)
{
out[0] = stb__Lerp13(p1[0], p2[0]);
out[1] = stb__Lerp13(p1[1], p2[1]);
out[2] = stb__Lerp13(p1[2], p2[2]);
}
/****************************************************************************/
// compute table to reproduce constant colors as accurately as possible
static void stb__PrepareOptTable(unsigned char *Table,const unsigned char *expand,int size)
{
int i,mn,mx;
for (i=0;i<256;i++) {
int bestErr = 256;
for (mn=0;mn<size;mn++) {
for (mx=0;mx<size;mx++) {
int mine = expand[mn];
int maxe = expand[mx];
int err = abs(stb__Lerp13(maxe, mine) - i);
// DX10 spec says that interpolation must be within 3% of "correct" result,
// add this as error term. (normally we'd expect a random distribution of
// +-1.5% error, but nowhere in the spec does it say that the error has to be
// unbiased - better safe than sorry).
err += abs(maxe - mine) * 3 / 100;
if(err < bestErr)
{
Table[i*2+0] = mx;
Table[i*2+1] = mn;
bestErr = err;
}
}
}
}
}
static void stb__EvalColors(unsigned char *color,unsigned short c0,unsigned short c1)
{
stb__From16Bit(color+ 0, c0);
stb__From16Bit(color+ 4, c1);
stb__Lerp13RGB(color+ 8, color+0, color+4);
stb__Lerp13RGB(color+12, color+4, color+0);
}
// Block dithering function. Simply dithers a block to 565 RGB.
// (Floyd-Steinberg)
static void stb__DitherBlock(unsigned char *dest, unsigned char *block)
{
int err[8],*ep1 = err,*ep2 = err+4, *et;
int ch,y;
// process channels seperately
for (ch=0; ch<3; ++ch) {
unsigned char *bp = block+ch, *dp = dest+ch;
unsigned char *quant = (ch == 1) ? stb__QuantGTab+8 : stb__QuantRBTab+8;
memset(err, 0, sizeof(err));
for(y=0; y<4; ++y) {
dp[ 0] = quant[bp[ 0] + ((3*ep2[1] + 5*ep2[0]) >> 4)];
ep1[0] = bp[ 0] - dp[ 0];
dp[ 4] = quant[bp[ 4] + ((7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]) >> 4)];
ep1[1] = bp[ 4] - dp[ 4];
dp[ 8] = quant[bp[ 8] + ((7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]) >> 4)];
ep1[2] = bp[ 8] - dp[ 8];
dp[12] = quant[bp[12] + ((7*ep1[2] + 5*ep2[3] + ep2[2]) >> 4)];
ep1[3] = bp[12] - dp[12];
bp += 16;
dp += 16;
et = ep1, ep1 = ep2, ep2 = et; // swap
}
}
}
// The color matching function
static unsigned int stb__MatchColorsBlock(unsigned char *block, unsigned char *color,int dither)
{
unsigned int mask = 0;
int dirr = color[0*4+0] - color[1*4+0];
int dirg = color[0*4+1] - color[1*4+1];
int dirb = color[0*4+2] - color[1*4+2];
int dots[16];
int stops[4];
int i;
int c0Point, halfPoint, c3Point;
for(i=0;i<16;i++)
dots[i] = block[i*4+0]*dirr + block[i*4+1]*dirg + block[i*4+2]*dirb;
for(i=0;i<4;i++)
stops[i] = color[i*4+0]*dirr + color[i*4+1]*dirg + color[i*4+2]*dirb;
// think of the colors as arranged on a line; project point onto that line, then choose
// next color out of available ones. we compute the crossover points for "best color in top
// half"/"best in bottom half" and then the same inside that subinterval.
//
// relying on this 1d approximation isn't always optimal in terms of euclidean distance,
// but it's very close and a lot faster.
// http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html
c0Point = (stops[1] + stops[3]) >> 1;
halfPoint = (stops[3] + stops[2]) >> 1;
c3Point = (stops[2] + stops[0]) >> 1;
if(!dither) {
// the version without dithering is straightforward
for (i=15;i>=0;i--) {
int dot = dots[i];
mask <<= 2;
if(dot < halfPoint)
mask |= (dot < c0Point) ? 1 : 3;
else
mask |= (dot < c3Point) ? 2 : 0;
}
} else {
// with floyd-steinberg dithering
int err[8],*ep1 = err,*ep2 = err+4;
int *dp = dots, y;
c0Point <<= 4;
halfPoint <<= 4;
c3Point <<= 4;
for(i=0;i<8;i++)
err[i] = 0;
for(y=0;y<4;y++)
{
int dot,lmask,step;
dot = (dp[0] << 4) + (3*ep2[1] + 5*ep2[0]);
if(dot < halfPoint)
step = (dot < c0Point) ? 1 : 3;
else
step = (dot < c3Point) ? 2 : 0;
ep1[0] = dp[0] - stops[step];
lmask = step;
dot = (dp[1] << 4) + (7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]);
if(dot < halfPoint)
step = (dot < c0Point) ? 1 : 3;
else
step = (dot < c3Point) ? 2 : 0;
ep1[1] = dp[1] - stops[step];
lmask |= step<<2;
dot = (dp[2] << 4) + (7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]);
if(dot < halfPoint)
step = (dot < c0Point) ? 1 : 3;
else
step = (dot < c3Point) ? 2 : 0;
ep1[2] = dp[2] - stops[step];
lmask |= step<<4;
dot = (dp[3] << 4) + (7*ep1[2] + 5*ep2[3] + ep2[2]);
if(dot < halfPoint)
step = (dot < c0Point) ? 1 : 3;
else
step = (dot < c3Point) ? 2 : 0;
ep1[3] = dp[3] - stops[step];
lmask |= step<<6;
dp += 4;
mask |= lmask << (y*8);
{ int *et = ep1; ep1 = ep2; ep2 = et; } // swap
}
}
return mask;
}
// The color optimization function. (Clever code, part 1)
static void stb__OptimizeColorsBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16)
{
int mind = 0x7fffffff,maxd = -0x7fffffff;
unsigned char *minp, *maxp;
double magn;
int v_r,v_g,v_b;
static const int nIterPower = 4;
float covf[6],vfr,vfg,vfb;
// determine color distribution
int cov[6];
int mu[3],min[3],max[3];
int ch,i,iter;
for(ch=0;ch<3;ch++)
{
const unsigned char *bp = ((const unsigned char *) block) + ch;
int muv,minv,maxv;
muv = minv = maxv = bp[0];
for(i=4;i<64;i+=4)
{
muv += bp[i];
if (bp[i] < minv) minv = bp[i];
else if (bp[i] > maxv) maxv = bp[i];
}
mu[ch] = (muv + 8) >> 4;
min[ch] = minv;
max[ch] = maxv;
}
// determine covariance matrix
for (i=0;i<6;i++)
cov[i] = 0;
for (i=0;i<16;i++)
{
int r = block[i*4+0] - mu[0];
int g = block[i*4+1] - mu[1];
int b = block[i*4+2] - mu[2];
cov[0] += r*r;
cov[1] += r*g;
cov[2] += r*b;
cov[3] += g*g;
cov[4] += g*b;
cov[5] += b*b;
}
// convert covariance matrix to float, find principal axis via power iter
for(i=0;i<6;i++)
covf[i] = cov[i] / 255.0f;
vfr = (float) (max[0] - min[0]);
vfg = (float) (max[1] - min[1]);
vfb = (float) (max[2] - min[2]);
for(iter=0;iter<nIterPower;iter++)
{
float r = vfr*covf[0] + vfg*covf[1] + vfb*covf[2];
float g = vfr*covf[1] + vfg*covf[3] + vfb*covf[4];
float b = vfr*covf[2] + vfg*covf[4] + vfb*covf[5];
vfr = r;
vfg = g;
vfb = b;
}
magn = fabs(vfr);
if (fabs(vfg) > magn) magn = fabs(vfg);
if (fabs(vfb) > magn) magn = fabs(vfb);
if(magn < 4.0f) { // too small, default to luminance
v_r = 299; // JPEG YCbCr luma coefs, scaled by 1000.
v_g = 587;
v_b = 114;
} else {
magn = 512.0 / magn;
v_r = (int) (vfr * magn);
v_g = (int) (vfg * magn);
v_b = (int) (vfb * magn);
}
// Pick colors at extreme points
for(i=0;i<16;i++)
{
int dot = block[i*4+0]*v_r + block[i*4+1]*v_g + block[i*4+2]*v_b;
if (dot < mind) {
mind = dot;
minp = block+i*4;
}
if (dot > maxd) {
maxd = dot;
maxp = block+i*4;
}
}
*pmax16 = stb__As16Bit(maxp[0],maxp[1],maxp[2]);
*pmin16 = stb__As16Bit(minp[0],minp[1],minp[2]);
}
static int stb__sclamp(float y, int p0, int p1)
{
int x = (int) y;
if (x < p0) return p0;
if (x > p1) return p1;
return x;
}
// The refinement function. (Clever code, part 2)
// Tries to optimize colors to suit block contents better.
// (By solving a least squares system via normal equations+Cramer's rule)
static int stb__RefineBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16, unsigned int mask)
{
static const int w1Tab[4] = { 3,0,2,1 };
static const int prods[4] = { 0x090000,0x000900,0x040102,0x010402 };
// ^some magic to save a lot of multiplies in the accumulating loop...
// (precomputed products of weights for least squares system, accumulated inside one 32-bit register)
float frb,fg;
unsigned short oldMin, oldMax, min16, max16;
int i, akku = 0, xx,xy,yy;
int At1_r,At1_g,At1_b;
int At2_r,At2_g,At2_b;
unsigned int cm = mask;
oldMin = *pmin16;
oldMax = *pmax16;
if((mask ^ (mask<<2)) < 4) // all pixels have the same index?
{
// yes, linear system would be singular; solve using optimal
// single-color match on average color
int r = 8, g = 8, b = 8;
for (i=0;i<16;++i) {
r += block[i*4+0];
g += block[i*4+1];
b += block[i*4+2];
}
r >>= 4; g >>= 4; b >>= 4;
max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0];
min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1];
} else {
At1_r = At1_g = At1_b = 0;
At2_r = At2_g = At2_b = 0;
for (i=0;i<16;++i,cm>>=2) {
int step = cm&3;
int w1 = w1Tab[step];
int r = block[i*4+0];
int g = block[i*4+1];
int b = block[i*4+2];
akku += prods[step];
At1_r += w1*r;
At1_g += w1*g;
At1_b += w1*b;
At2_r += r;
At2_g += g;
At2_b += b;
}
At2_r = 3*At2_r - At1_r;
At2_g = 3*At2_g - At1_g;
At2_b = 3*At2_b - At1_b;
// extract solutions and decide solvability
xx = akku >> 16;
yy = (akku >> 8) & 0xff;
xy = (akku >> 0) & 0xff;
frb = 3.0f * 31.0f / 255.0f / (xx*yy - xy*xy);
fg = frb * 63.0f / 31.0f;
// solve.
max16 = stb__sclamp((At1_r*yy - At2_r*xy)*frb+0.5f,0,31) << 11;
max16 |= stb__sclamp((At1_g*yy - At2_g*xy)*fg +0.5f,0,63) << 5;
max16 |= stb__sclamp((At1_b*yy - At2_b*xy)*frb+0.5f,0,31) << 0;
min16 = stb__sclamp((At2_r*xx - At1_r*xy)*frb+0.5f,0,31) << 11;
min16 |= stb__sclamp((At2_g*xx - At1_g*xy)*fg +0.5f,0,63) << 5;
min16 |= stb__sclamp((At2_b*xx - At1_b*xy)*frb+0.5f,0,31) << 0;
}
*pmin16 = min16;
*pmax16 = max16;
return oldMin != min16 || oldMax != max16;
}
// Color block compression
static void stb__CompressColorBlock(unsigned char *dest, unsigned char *block, int mode)
{
unsigned int mask;
int i;
int dither;
int refinecount;
unsigned short max16, min16;
unsigned char dblock[16*4],color[4*4];
dither = mode & STB_DXT_DITHER;
refinecount = (mode & STB_DXT_HIGHQUAL) ? 2 : 1;
// check if block is constant
for (i=1;i<16;i++)
if (((unsigned int *) block)[i] != ((unsigned int *) block)[0])
break;
if(i == 16) { // constant color
int r = block[0], g = block[1], b = block[2];
mask = 0xaaaaaaaa;
max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0];
min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1];
} else {
// first step: compute dithered version for PCA if desired
if(dither)
stb__DitherBlock(dblock,block);
// second step: pca+map along principal axis
stb__OptimizeColorsBlock(dither ? dblock : block,&max16,&min16);
if (max16 != min16) {
stb__EvalColors(color,max16,min16);
mask = stb__MatchColorsBlock(block,color,dither);
} else
mask = 0;
// third step: refine (multiple times if requested)
for (i=0;i<refinecount;i++) {
unsigned int lastmask = mask;
if (stb__RefineBlock(dither ? dblock : block,&max16,&min16,mask)) {
if (max16 != min16) {
stb__EvalColors(color,max16,min16);
mask = stb__MatchColorsBlock(block,color,dither);
} else {
mask = 0;
break;
}
}
if(mask == lastmask)
break;
}
}
// write the color block
if(max16 < min16)
{
unsigned short t = min16;
min16 = max16;
max16 = t;
mask ^= 0x55555555;
}
dest[0] = (unsigned char) (max16);
dest[1] = (unsigned char) (max16 >> 8);
dest[2] = (unsigned char) (min16);
dest[3] = (unsigned char) (min16 >> 8);
dest[4] = (unsigned char) (mask);
dest[5] = (unsigned char) (mask >> 8);
dest[6] = (unsigned char) (mask >> 16);
dest[7] = (unsigned char) (mask >> 24);
}
// Alpha block compression (this is easy for a change)
static void stb__CompressAlphaBlock(unsigned char *dest,unsigned char *src,int mode)
{
int i,dist,bias,dist4,dist2,bits,mask;
// find min/max color
int mn,mx;
mn = mx = src[3];
for (i=1;i<16;i++)
{
if (src[i*4+3] < mn) mn = src[i*4+3];
else if (src[i*4+3] > mx) mx = src[i*4+3];
}
// encode them
((unsigned char *)dest)[0] = mx;
((unsigned char *)dest)[1] = mn;
dest += 2;
// determine bias and emit color indices
// given the choice of mx/mn, these indices are optimal:
// http://fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination/
dist = mx-mn;
dist4 = dist*4;
dist2 = dist*2;
bias = (dist < 8) ? (dist - 1) : (dist/2 + 2);
bias -= mn * 7;
bits = 0,mask=0;
for (i=0;i<16;i++) {
int a = src[i*4+3]*7 + bias;
int ind,t;
// select index. this is a "linear scale" lerp factor between 0 (val=min) and 7 (val=max).
t = (a >= dist4) ? -1 : 0; ind = t & 4; a -= dist4 & t;
t = (a >= dist2) ? -1 : 0; ind += t & 2; a -= dist2 & t;
ind += (a >= dist);
// turn linear scale into DXT index (0/1 are extremal pts)
ind = -ind & 7;
ind ^= (2 > ind);
// write index
mask |= ind << bits;
if((bits += 3) >= 8) {
*dest++ = mask;
mask >>= 8;
bits -= 8;
}
}
}
static void stb__InitDXT()
{
int i;
for(i=0;i<32;i++)
stb__Expand5[i] = (i<<3)|(i>>2);
for(i=0;i<64;i++)
stb__Expand6[i] = (i<<2)|(i>>4);
for(i=0;i<256+16;i++)
{
int v = i-8 < 0 ? 0 : i-8 > 255 ? 255 : i-8;
stb__QuantRBTab[i] = stb__Expand5[stb__Mul8Bit(v,31)];
stb__QuantGTab[i] = stb__Expand6[stb__Mul8Bit(v,63)];
}
stb__PrepareOptTable(&stb__OMatch5[0][0],stb__Expand5,32);
stb__PrepareOptTable(&stb__OMatch6[0][0],stb__Expand6,64);
}
void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src, int alpha, int mode)
{
static int init=1;
if (init) {
stb__InitDXT();
init=0;
}
if (alpha) {
stb__CompressAlphaBlock(dest,(unsigned char*) src,mode);
dest += 8;
}
stb__CompressColorBlock(dest,(unsigned char*) src,mode);
}
#endif // STB_DXT_IMPLEMENTATION
#endif // STB_INCLUDE_STB_DXT_H

View File

@@ -2,6 +2,7 @@
#include "raster.h"
#include "decompress.h"
#include "debug.h"
#include "stb_dxt_104.h"
#include <EGL/egl.h>
#include <EGL/eglext.h>
#include "gles.h"
@@ -243,7 +244,7 @@ static void *swizzle_texture(GLsizei width, GLsizei height,
}
GLenum swizzle_internalformat(GLenum *internalformat) {
GLenum ret;
GLenum ret = *internalformat;
GLenum sret;
switch(*internalformat) {
case GL_R:
@@ -299,6 +300,26 @@ GLenum swizzle_internalformat(GLenum *internalformat) {
else
sret = GL_LUMINANCE_ALPHA;
break;
// compressed format...
case GL_COMPRESSED_ALPHA:
sret = GL_ALPHA;
break;
case GL_COMPRESSED_LUMINANCE:
sret = GL_LUMINANCE;
break;
case GL_COMPRESSED_LUMINANCE_ALPHA:
if (nolumalpha)
sret = GL_RGBA;
else
sret = GL_LUMINANCE_ALPHA;
break;
case GL_COMPRESSED_RGB:
sret = GL_RGB;
break;
case GL_COMPRESSED_RGBA:
sret = GL_RGBA;
break;
default:
ret = GL_RGBA;
sret = GL_RGBA;
@@ -1314,8 +1335,15 @@ void glshim_glGetTexLevelParameteriv(GLenum target, GLint level, GLenum pname, G
case GL_TEXTURE_INTERNAL_FORMAT:
if (bound && bound->compressed)
(*params) = bound->format;
else
(*params) = GL_RGBA;
else {
if(bound && ((bound->orig_internal==GL_COMPRESSED_RGB) || (bound->orig_internal==GL_COMPRESSED_RGBA))) {
if(bound->orig_internal==GL_COMPRESSED_RGB)
*(params) = GL_COMPRESSED_RGB_S3TC_DXT1_EXT;
else
*(params) = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
} else
(*params) = GL_RGBA;
}
break;
case GL_TEXTURE_DEPTH:
(*params) = 0;
@@ -1339,11 +1367,24 @@ void glshim_glGetTexLevelParameteriv(GLenum target, GLint level, GLenum pname, G
case GL_TEXTURE_COMPRESSED:
if (bound && bound->compressed)
(*params) = GL_TRUE;
else
(*params) = GL_FALSE;
else {
if(bound && ((bound->orig_internal==GL_COMPRESSED_RGB) || (bound->orig_internal==GL_COMPRESSED_RGBA)))
(*params) = GL_TRUE;
else
(*params) = GL_FALSE;
}
break;
case GL_TEXTURE_COMPRESSED_IMAGE_SIZE:
(*params) = (bound)?(bound->width*bound->height*4):0;
if(bound && ((bound->orig_internal==GL_COMPRESSED_RGB) || (bound->orig_internal==GL_COMPRESSED_RGBA))) {
int w = bound->width>>level;
int h = bound->height>>level;
w = ((w>>2)+1) << 2; h = ((h>>2)+1) << 2; //DXT works on 4x4 blocks...
if (bound->orig_internal==GL_COMPRESSED_RGB) //DXT1, 64bits (i.e. size=8) for a 4x4 block
(*params) = (w*h)/2;
else //DXT5, 64+64 (i.e. size = 16) for a 4x4 block
(*params) = w*h;
} else
(*params) = (bound)?(bound->width*bound->height*4):0;
break;
default:
errorShim(GL_INVALID_ENUM); //Wrong here...
@@ -1820,9 +1861,49 @@ void glshim_glCompressedTexSubImage2D(GLenum target, GLint level, GLint xoffset,
void glshim_glGetCompressedTexImage(GLenum target, GLint lod, GLvoid *img) {
if (glstate.gl_batch) flush();
printf("LIBGL: Stub GetCompressedTexImage\n");
// printf("LIBGL: Stub GetCompressedTexImage\n");
gltexture_t* bound = glstate.texture.bound[glstate.texture.active];
errorShim(GL_INVALID_OPERATION);
if(!bound)
return;
if(bound->orig_internal!=GL_COMPRESSED_RGB && bound->orig_internal!=GL_COMPRESSED_RGBA)
return;
int width = bound->width>>lod;
int height = bound->height>>lod;
int w = ((width>>2)+1)<<2;
int h = ((height>>2)+1)<<2;
int alpha = (bound->orig_internal==GL_COMPRESSED_RGBA)?1:0;
glbuffer_t *unpack = glstate.vao->unpack;
glbuffer_t *pack = glstate.vao->pack;
glstate.vao->unpack = NULL;
glstate.vao->pack = NULL;
GLvoid *datab = (GLvoid*)img;
if (pack)
datab += (uintptr_t)pack->data;
// alloc the memory for source image and grab the file
GLuint *src = (GLuint*)malloc(width*height*4);
glshim_glGetTexImage(target, lod, GL_RGBA, GL_UNSIGNED_BYTE, (GLvoid*)src);
GLuint tmp[4*4]; //this is the 4x4 block
for (int y = 0; y < h; y+=4)
for (int x = 0; x < w; x+=4) {
GLuint col = 0;
for (int i=0; i<16; i++) {
if(x+(i%4)<width && y+(i/4)<height)
col = src[x+(i%4)+(y+(i/4))*width];
tmp[i] = col;
}
stb_compress_dxt_block((unsigned char*)datab, (const unsigned char*)tmp, alpha, STB_DXT_NORMAL);
datab+=8*(alpha+1);
}
free(src);
glstate.vao->unpack = unpack;
glstate.vao->pack = pack;
noerrorShim();
return;
}

View File

@@ -167,6 +167,7 @@ extern int texstream;
extern int copytex;
extern int nolumalpha;
extern int blendhack;
extern int noerror;
extern char gl_version[50];
bool g_recyclefbo = false;
@@ -429,7 +430,8 @@ static void scan_env() {
}
env(LIBGL_BLENDHACK, blendhack, "Change Blend GL_SRC_ALPHA, GL_ONE to GL_ONE, GL_ONE");
env(LIBGL_NOERROR, noerror, "glGetError() always return GL_NOERROR");
char *env_version = getenv("LIBGL_VERSION");
if (env_version) {
printf("LIBGL: Overide version string with \"%s\" (should be in the form of \"1.x\")\n", gl_version);
@@ -824,6 +826,7 @@ const char *glXQueryExtensionsString(Display *display, int screen) {
"GLX_ARB_create_context "
"GLX_ARB_create_context_profile "
"GLX_EXT_create_context_es2_profile "
"GLX_ARB_get_proc_address "
};
return extensions;
}