//Note: this code does not claim to be bit-correct. //It doesn't support volume textures. //It doesn't validate block extents (and is generally unaware of more than one block anyway) //It doesn't implement all validation checks, either. //Do NOT use this code to validate any encoders... //Based upon documentation here: https://www.khronos.org/registry/OpenGL/extensions/OES/OES_texture_compression_astc.txt #ifndef ASTC_PUBLIC #define ASTC_PUBLIC #endif #define ASTC_WITH_LDR //comment out this line to disable pure-LDR decoding (the hdr code can still be used). #define ASTC_WITH_HDR //comment out this line to disable HDR decoding. #define ASTC_WITH_HDRTEST //comment out this line to disable checking for which profile is needed. //#define ASTC_WITH_3D #ifdef ASTC_WITH_LDR ASTC_PUBLIC void ASTC_Decode_LDR8(unsigned char *in, unsigned char *out, int pixstride/*outwidth*/, int layerstride/*outwidth*outheight*/, int bw,int bh,int bd); //generates RGBA8 data (gives error colour for hdr blocks!) #endif #ifdef ASTC_WITH_HDR ASTC_PUBLIC void ASTC_Decode_HDR(unsigned char *in, unsigned short *out, int pixstride/*outwidth*/, int layerstride/*outwidth*outheight*/, int bw,int bh,int bd); //generates RGBA16F data. #endif #ifdef ASTC_WITH_HDRTEST ASTC_PUBLIC int ASTC_BlocksAreHDR(unsigned char *in, size_t datasize, int bw, int bh, int bd); //returns true if n consecutive blocks require the HDR profile (ie: detects when you need to soft-decode for drivers with partial support, as opposed to just always decompressing). #endif #include #include #include #ifndef Vector4Set #define Vector4Set(r,x,y,z,w) {(r)[0] = x; (r)[1] = y;(r)[2] = z;(r)[3]=w;} #endif #ifndef countof #define countof(array) (sizeof(array)/sizeof(array[0])) #endif #if defined(ASTC_WITH_LDR) || defined(ASTC_WITH_HDR) #define ASTC_WITH_DECODE #endif enum astc_status_e { //valid blocks ASTC_OKAY, //we can decode at least part of this normally (hdr endpoints may still result in per-endpoint errors). ASTC_VOID_LDR, //not an error - the block is a single LDR colour, with an RGBA16 colour in the last 8 bytes. ASTC_VOID_HDR, //not an error - the block is a single HDR colour, with an RGBA16F colour in the last 8 bytes. //invalid blocks ASTC_ERROR, //validation errors ASTC_UNSUPPORTED, //basically just volume textures ASTC_RESERVED, //reserved bits. basically an error but might not be in the future. }; struct astc_block_info { unsigned char *in; //the 16 bytes of the block unsigned char blocksize[3]; //block width, height, depth(1 for 2d). enum astc_status_e status; //block status/type. unsigned char dualplane; //two sets of weights instead of one. unsigned char ccs; //second set applies to this component unsigned char precision; //defines the precision of the weights int wcount[4]; //x,y,z,total weight counts int weight_bits; //size of weights section. int config_bits; //size of header before the endpoint bits int ep_bits; //size available to endpoints unsigned char weights[64]; //official limit to the number of weights stored unsigned char partitions; //number of active partitions to select from (and number of endpoints to read) unsigned short partindex; //used for deciding which partition each pixel belongs in struct astc_part { unsigned char mode; //endpoint modes #ifdef ASTC_WITH_HDR unsigned char hdr; //endpoint colour mode - &1=rgb, &2=alpha #endif int ep[2][4]; } part[4]; }; static unsigned char ASTC_readbits(unsigned char *in, unsigned int offset, unsigned int count) { //only reads up to 9 bits, because offset 7 with 10 bits needs to read more than two bytes unsigned short s; in += offset>>3; offset &= 7; s = in[0]; if (offset+count>8) s |= (in[1]<<8); s>>=offset; return s & ((1u< 8) { count -= 8; r |= ASTC_readbits(in, offset+count, 8)<32 is +1 (otherwise it would be 0-63) //high bits are folded over static unsigned char dequant_weight_1b[1<<1] = {0x00,0x40}; static unsigned char dequant_weight_2b[1<<2] = {0x00,0x15,0x2b,0x40}; static unsigned char dequant_weight_3b[1<<3] = {0x00,0x09,0x12,0x1b,0x25,0x2e,0x37,0x40}; static unsigned char dequant_weight_4b[1<<4] = {0x00,0x04,0x08,0x0c,0x11,0x15,0x19,0x1d,0x23,0x27,0x2b,0x2f,0x34,0x38,0x3c,0x40}; static unsigned char dequant_weight_5b[1<<5] = {0x00,0x02,0x04,0x06,0x08,0x0a,0x0c,0x0e,0x10,0x12,0x14,0x16,0x18,0x1a,0x1c,0x1e,0x22,0x24,0x26,0x28,0x2a,0x2c,0x2e,0x30,0x32,0x34,0x36,0x38,0x3a,0x3c,0x3e,0x40}; static unsigned char dequant_weight_0t[3] = {0,32,64}; static unsigned char dequant_weight_1t[6] = {0x00,0x40,0x0c,0x34,0x19,0x27}; static unsigned char dequant_weight_2t[12] = {0x00,0x40,0x11,0x2f,0x06,0x3a,0x17,0x29,0x0c,0x34,0x1d,0x23}; static unsigned char dequant_weight_3t[24] = {0x00,0x40,0x08,0x38,0x10,0x30,0x18,0x28,0x02,0x3e,0x0b,0x35,0x13,0x2d,0x1b,0x25,0x05,0x3b,0x0d,0x33,0x16,0x2a,0x1e,0x22}; static unsigned char dequant_weight_0q[5] = {0,16,32,48,64}; static unsigned char dequant_weight_1q[10] = {0x00,0x40,0x05,0x3b,0x0b,0x35,0x11,0x2f,0x17,0x29}; static unsigned char dequant_weight_2q[20] = {0x00,0x40,0x10,0x30,0x03,0x3d,0x13,0x2d,0x06,0x3a,0x17,0x29,0x09,0x37,0x1a,0x26,0x0d,0x33,0x1d,0x23}; static const struct { unsigned char extra, bits, *dequant; } astc_weightmode[] = { {0,0, NULL}, //invalid {0,0, NULL}, //invalid {0,1, dequant_weight_1b}, //2 {1,0, dequant_weight_0t}, //3 {0,2, dequant_weight_2b}, //4 {2,0, dequant_weight_0q}, //5 {1,1, dequant_weight_1t}, //6 {0,3, dequant_weight_3b}, //8 {0,0, NULL}, //invalid {0,0, NULL}, //invalid {2,1, dequant_weight_1q}, //10 {1,2, dequant_weight_2t}, //12 {0,4, dequant_weight_4b}, //16 {2,2, dequant_weight_2q}, //20 {1,3, dequant_weight_3t}, //24 {0,5, dequant_weight_5b}, //32 }; static unsigned int ASTC_DecodeSize(unsigned int count, unsigned int bits, unsigned char extra) { return ((extra==1)?((count*8)+4)/5:0) + ((extra==2)?((count*7)+2)/3:0) + count*bits; } static void ASTC_ReadBlockMode(struct astc_block_info *b) { unsigned char *in = b->in; unsigned short s = ASTC_readmanybits(in, 0, 13);//in[0] | (in[1]<<8); b->config_bits = 13; if ((s&0x1ff)==0x1fc) { //void extent if (s&0x200) b->status = ASTC_VOID_HDR; else b->status = ASTC_VOID_LDR; b->dualplane = b->precision = b->wcount[0] = b->wcount[1] = b->wcount[2] = b->partitions = 0; return; } b->status = ASTC_OKAY; b->dualplane = (s>>10)&1; //Dp b->precision = (s>>(9-3))&(1<<3);//P b->precision |= (s>>4)&1; //p0 if (b->blocksize[2] != 1) { //3d blocks have a different header layout #ifdef ASTC_WITH_3D if (s&3) { b->precision|=(s&3)<<1; //p2, p1 b->wcount[0] = ((s>>5)&3)+2, b->wcount[1] = ((s>>7)&3)+2, b->wcount[2] = ((s>>2)&3)+2; } else { b->precision|=(s&0xc)>>1; //p2, p1 if ((s&0x180)!=0x180) { b->dualplane = 0; //always single plane. b->precision &= 7; //clear the high precision bit (reused for 'b') if (!(s&0x180)) b->wcount[0] = 6, b->wcount[1] = ((s>>9)&3)+2, b->wcount[2] = ((s>>5)&3)+2; else if (!(s&0x80)) b->wcount[0] = ((s>>5)&3)+2, b->wcount[1] = 6, b->wcount[2] = ((s>>9)&3)+2; else b->wcount[0] = ((s>>5)&3)+2, b->wcount[1] = ((s>>9)&3)+2, b->wcount[2] = 6; } else if ((s&0x60)!=0x60) { if (!(s&0x60)) b->wcount[0] = 6, b->wcount[1] = 2, b->wcount[2] = 2; else if (!(s&0x20)) b->wcount[0] = 2, b->wcount[1] = 6, b->wcount[2] = 2; else //40 b->wcount[0] = 2, b->wcount[1] = 2, b->wcount[2] = 6; } else b->status = ASTC_RESERVED; //reserved (or void extent, but those were handled above) } #else b->status = ASTC_UNSUPPORTED; #endif } else { b->wcount[2] = 1; if (s&3) { //one of the first 5 layouts... b->precision|=(s&3)<<1; //p2, p1 if (!(s&8)) { //first two layouts... if (!(s&4)) { //layout0 b->wcount[0] = ((s>>7)&3)+4; b->wcount[1] = ((s>>5)&3)+2; } else { //layout1 b->wcount[0] = ((s>>7)&3)+8; b->wcount[1] = ((s>>5)&3)+2; } } else if (!(s&4)) { //layout2 b->wcount[0] = ((s>>5)&3)+2; b->wcount[1] = ((s>>7)&3)+8; } else if (!(s&256)) { //layout3 b->wcount[0] = ((s>>5)&3)+2; b->wcount[1] = ((s>>7)&1)+6; } else { //layout4 b->wcount[0] = ((s>>7)&1)+2; b->wcount[1] = ((s>>5)&3)+2; } } else { //one of the later layouts b->precision|=(s&0xc)>>1; //p2, p1 if (!(s&384)) { b->wcount[0] = 12; b->wcount[1] = ((s>>5)&3)+2; } else if ((s&384)==128) { b->wcount[0] = ((s>>5)&3)+2; b->wcount[1] = 12; } else if ((s&480)==384) { b->wcount[0] = 6; b->wcount[1] = 10; } else if ((s&480)==416) { b->wcount[0] = 10; b->wcount[1] = 6; } else if ((s&384)==256) { b->wcount[0] = ((s>>5)&3)+6; b->wcount[1] = ((s>>9)&3)+6; b->dualplane = 0; //forget the Dp bit, its reused in this layout b->precision &= 7; //forget the P bit, too } else b->status = ASTC_RESERVED; //reserved } } b->partitions = ((s>>11)&3)+1; if (b->partitions > 3 && b->dualplane) b->status = ASTC_ERROR; //apparently. if (b->wcount[0] > b->blocksize[0] || b->wcount[1] > b->blocksize[1] || b->wcount[2] > b->blocksize[2]) b->status = ASTC_ERROR; //invalid weight counts. b->wcount[3] = b->wcount[0] * b->wcount[1] * b->wcount[2]; b->wcount[3]<<=b->dualplane; //dual-plane has twice the weights - interleaved. if (b->wcount[3] > countof(b->weights)) b->status = ASTC_ERROR; //more than 64 weights are banned, for some reason b->weight_bits = ASTC_DecodeSize(b->wcount[3], astc_weightmode[b->precision].bits, astc_weightmode[b->precision].extra); } static void ASTC_ReadPartitions(struct astc_block_info *b) { int sel; int i; unsigned char *in = b->in; int weight_bits = b->weight_bits; if (b->partitions == 1) { //single-partition mode, simple CEM b->partindex = 0; b->part[0].mode = ASTC_readbits(in, b->config_bits, 4); b->config_bits += 4; } else { //multi b->partindex = ASTC_readmanybits(in, b->config_bits, 10); b->config_bits += 10; sel = ASTC_readbits(in, b->config_bits, 6); b->config_bits += 6; if (!(sel&3)) { sel = (sel>>2)&0xf; for (i = 0; i < b->partitions; i++) b->part[i].mode = sel; //all the same } else { int shift = 2; int highbits = b->partitions*3 - 4; weight_bits += highbits; sel |= ASTC_readbits(in, 128-weight_bits, highbits)<<6; //I don't know why this is separate. it seems like an unnecessary complication to me. for (i = 0; i < b->partitions; i++, shift++) { b->part[i].mode = ((sel&3)-1)<<2; //class groups b->part[i].mode += ((sel>>shift)&1)<<2;//class } for (i = 0; i < b->partitions; i++, shift+=2) b->part[i].mode += (sel>>shift)&3; //specific mode info } } if (b->dualplane) { weight_bits += 2; b->ccs = ASTC_readbits(in, 128-weight_bits, 2); } else b->ccs = 0; b->ep_bits = 128 - weight_bits - b->config_bits; //weights are at 128-weight_bits to 128 //epdata is at config_bits to config_bits+ep_bits } #ifdef ASTC_WITH_HDRTEST ASTC_PUBLIC int ASTC_BlocksAreHDR(unsigned char *in, size_t datasize, int bw, int bh, int bd) { struct astc_block_info b; int i; size_t blocks = datasize/16; b.in = in; b.blocksize[0] = bw; b.blocksize[1] = bh; b.blocksize[2] = bd; while(blocks --> 0) { ASTC_ReadBlockMode(&b); if (b.status == ASTC_VOID_HDR) return 1; //if we're getting hdr blocks then we can decode properly only with hdr if (b.status == ASTC_VOID_LDR) return 0; //if we're getting ldr blocks, then its unlikely that there's any hdr blocks in there. if (b.status != ASTC_OKAY) continue; ASTC_ReadPartitions(&b); for (i = 0; i < b.partitions; i++) { switch(b.part[i].mode) { case 2: case 3: case 7: case 11: case 14: case 15: return 1; } } b.in += 16; } return 0; } #endif #ifdef ASTC_WITH_DECODE static unsigned char ASTC_readbits2(unsigned char *in, unsigned int *offset, unsigned int count) { //only reads up to 9 bits, because offset 7 with 10 bits needs to read more than two bytes unsigned char r = ASTC_readbits(in, *offset, count); *offset += count; return r; } static void ASTC_Decode(unsigned char *in, unsigned char *out, int count, unsigned int offset, int bits, int extra, unsigned char *dequant) { unsigned char block[5]; int j; //unfortunately these trits depend upon the values of the later bits in each block. //if only it were a nice simple modulo... if (extra==1) { //read it 5 samples at a time while(count > 0) { unsigned int t, c; block[0] = ASTC_readbits2(in, &offset, bits); t = ASTC_readbits2(in, &offset, 2); if (count > 1) { block[1] = ASTC_readbits2(in, &offset, bits); t |= ASTC_readbits2(in, &offset, 2)<<2; } else block[1] = 0; if (count > 2) { block[2] = ASTC_readbits2(in, &offset, bits); t |= ASTC_readbits2(in, &offset, 1)<<4; } else block[2] = 0; if (count > 3) { block[3] = ASTC_readbits2(in, &offset, bits); t |= ASTC_readbits2(in, &offset, 2)<<5; } else block[3] = 0; if (count > 4) { block[4] = ASTC_readbits2(in, &offset, bits); t |= ASTC_readbits2(in, &offset, 1)<<7; } else block[4] = 0; //okay, we read the block, now figure out the trits and pack them into the high part of the result if ((t&0x1c) == 0x1c) { c = ((t>>3)&0x1c) | (t&3); block[4] |= 2<>7)<>7)<>5)&3)<>4)&1)<>2)&2) | ((c>>2)&~(c>>3)&1))<>4)&1)<>2)&3)<>1)))< 0) { unsigned int t, c; block[0] = ASTC_readbits2(in, &offset, bits); t = ASTC_readbits2(in, &offset, 3); if (count > 1) { block[1] = ASTC_readbits2(in, &offset, bits); t |= ASTC_readbits2(in, &offset, 2)<<3; } else block[1] = 0; if (count > 2) { block[2] = ASTC_readbits2(in, &offset, bits); t |= ASTC_readbits2(in, &offset, 2)<<5; } else block[2] = 0; //okay, we read the block, now figure out the trits and pack them into the high part of the result if ((t&6)==6 && !(t&0x60)) { block[2] |= (((t&1)<<2) | (((t>>4)&~t&1)<<1) | ((t>>3)&~t&1))<>3)&3)<<3; c |= (~(t>>5)&3)<<1; c |= t&1; } else { block[2] |= ((t>>5)&3)<>3)&3)<>3)&3)< 0) //pure bits, nice and simple { unsigned char val = ASTC_readbits2(in, &offset, bits); *out++ = dequant[val]; } } //endpoints have a logical value between 0 and 255. //bit replication is used to fill in missing precision static unsigned char dequant_ep_1b[1<<1] = {0,255}; static unsigned char dequant_ep_2b[1<<2] = {0x00,0x55,0xaa,0xff}; static unsigned char dequant_ep_3b[1<<3] = {0x00,0x24,0x49,0x6d,0x92,0xb6,0xdb,0xff}; static unsigned char dequant_ep_4b[1<<4] = { 0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff}; static unsigned char dequant_ep_5b[1<<5] = { 0x00,0x08,0x10,0x18,0x21,0x29,0x31,0x39,0x42,0x4a,0x52,0x5a,0x63,0x6b,0x73,0x7b, 0x84,0x8c,0x94,0x9c,0xa5,0xad,0xb5,0xbd,0xc6,0xce,0xd6,0xde,0xe7,0xef,0xf7,0xff}; static unsigned char dequant_ep_6b[1<<6] = { 0x00,0x04,0x08,0x0c,0x10,0x14,0x18,0x1c,0x20,0x24,0x28,0x2c,0x30,0x34,0x38,0x3c, 0x41,0x45,0x49,0x4d,0x51,0x55,0x59,0x5d,0x61,0x65,0x69,0x6d,0x71,0x75,0x79,0x7d, 0x82,0x86,0x8a,0x8e,0x92,0x96,0x9a,0x9e,0xa2,0xa6,0xaa,0xae,0xb2,0xb6,0xba,0xbe, 0xc3,0xc7,0xcb,0xcf,0xd3,0xd7,0xdb,0xdf,0xe3,0xe7,0xeb,0xef,0xf3,0xf7,0xfb,0xff}; static unsigned char dequant_ep_7b[1<<7] = { 0x00,0x02,0x04,0x06,0x08,0x0a,0x0c,0x0e,0x10,0x12,0x14,0x16,0x18,0x1a,0x1c,0x1e, 0x20,0x22,0x24,0x26,0x28,0x2a,0x2c,0x2e,0x30,0x32,0x34,0x36,0x38,0x3a,0x3c,0x3e, 0x40,0x42,0x44,0x46,0x48,0x4a,0x4c,0x4e,0x50,0x52,0x54,0x56,0x58,0x5a,0x5c,0x5e, 0x60,0x62,0x64,0x66,0x68,0x6a,0x6c,0x6e,0x70,0x72,0x74,0x76,0x78,0x7a,0x7c,0x7e, 0x81,0x83,0x85,0x87,0x89,0x8b,0x8d,0x8f,0x91,0x93,0x95,0x97,0x99,0x9b,0x9d,0x9f, 0xa1,0xa3,0xa5,0xa7,0xa9,0xab,0xad,0xaf,0xb1,0xb3,0xb5,0xb7,0xb9,0xbb,0xbd,0xbf, 0xc1,0xc3,0xc5,0xc7,0xc9,0xcb,0xcd,0xcf,0xd1,0xd3,0xd5,0xd7,0xd9,0xdb,0xdd,0xdf, 0xe1,0xe3,0xe5,0xe7,0xe9,0xeb,0xed,0xef,0xf1,0xf3,0xf5,0xf7,0xf9,0xfb,0xfd,0xff}; static unsigned char dequant_ep_8b[1<<8] = { 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f, 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f, 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f, 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f, 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f, 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f, 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f, 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f, 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf, 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf, 0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf, 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef, 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff}; static unsigned char dequant_ep_0t[3] = {0,128,255}; static unsigned char dequant_ep_1t[6] = {0x00,0xff,0x33,0xcc,0x66,0x99}; static unsigned char dequant_ep_2t[12] = {0x00,0xff,0x45,0xba,0x17,0xe8,0x5c,0xa3,0x2e,0xd1,0x74,0x8b}; static unsigned char dequant_ep_3t[24] = {0x00,0xff,0x21,0xde,0x42,0xbd,0x63,0x9c,0x0b,0xf4,0x2c,0xd3,0x4d,0xb2,0x6e,0x91,0x16,0xe9,0x37,0xc8,0x58,0xa7,0x79,0x86}; static unsigned char dequant_ep_4t[48] = {0x00,0xff,0x10,0xef,0x20,0xdf,0x30,0xcf,0x41,0xbe,0x51,0xae,0x61,0x9e,0x71,0x8e,0x05,0xfa,0x15,0xea,0x26,0xd9,0x36,0xc9,0x46,0xb9,0x56,0xa9,0x67,0x98,0x77,0x88,0x0b,0xf4,0x1b,0xe4,0x2b,0xd4,0x3b,0xc4,0x4c,0xb3,0x5c,0xa3,0x6c,0x93,0x7c,0x83}; static unsigned char dequant_ep_5t[96] = {0x00,0xff,0x08,0xf7,0x10,0xef,0x18,0xe7,0x20,0xdf,0x28,0xd7,0x30,0xcf,0x38,0xc7,0x40,0xbf,0x48,0xb7,0x50,0xaf,0x58,0xa7,0x60,0x9f,0x68,0x97,0x70,0x8f,0x78,0x87,0x02,0xfd,0x0a,0xf5,0x12,0xed,0x1a,0xe5,0x23,0xdc,0x2b,0xd4,0x33,0xcc,0x3b,0xc4,0x43,0xbc,0x4b,0xb4,0x53,0xac,0x5b,0xa4,0x63,0x9c,0x6b,0x94,0x73,0x8c,0x7b,0x84,0x05,0xfa,0x0d,0xf2,0x15,0xea,0x1d,0xe2,0x25,0xda,0x2d,0xd2,0x35,0xca,0x3d,0xc2,0x46,0xb9,0x4e,0xb1,0x56,0xa9,0x5e,0xa1,0x66,0x99,0x6e,0x91,0x76,0x89,0x7e,0x81}; static unsigned char dequant_ep_6t[192]= {0x00,0xff,0x04,0xfb,0x08,0xf7,0x0c,0xf3,0x10,0xef,0x14,0xeb,0x18,0xe7,0x1c,0xe3,0x20,0xdf,0x24,0xdb,0x28,0xd7,0x2c,0xd3,0x30,0xcf,0x34,0xcb,0x38,0xc7,0x3c,0xc3,0x40,0xbf,0x44,0xbb,0x48,0xb7,0x4c,0xb3,0x50,0xaf,0x54,0xab,0x58,0xa7,0x5c,0xa3,0x60,0x9f,0x64,0x9b,0x68,0x97,0x6c,0x93,0x70,0x8f,0x74,0x8b,0x78,0x87,0x7c,0x83,0x01,0xfe,0x05,0xfa,0x09,0xf6,0x0d,0xf2,0x11,0xee,0x15,0xea,0x19,0xe6,0x1d,0xe2,0x21,0xde,0x25,0xda,0x29,0xd6,0x2d,0xd2,0x31,0xce,0x35,0xca,0x39,0xc6,0x3d,0xc2,0x41,0xbe,0x45,0xba,0x49,0xb6,0x4d,0xb2,0x51,0xae,0x55,0xaa,0x59,0xa6,0x5d,0xa2,0x61,0x9e,0x65,0x9a,0x69,0x96,0x6d,0x92,0x71,0x8e,0x75,0x8a,0x79,0x86,0x7d,0x82,0x02,0xfd,0x06,0xf9,0x0a,0xf5,0x0e,0xf1,0x12,0xed,0x16,0xe9,0x1a,0xe5,0x1e,0xe1,0x22,0xdd,0x26,0xd9,0x2a,0xd5,0x2e,0xd1,0x32,0xcd,0x36,0xc9,0x3a,0xc5,0x3e,0xc1,0x42,0xbd,0x46,0xb9,0x4a,0xb5,0x4e,0xb1,0x52,0xad,0x56,0xa9,0x5a,0xa5,0x5e,0xa1,0x62,0x9d,0x66,0x99,0x6a,0x95,0x6e,0x91,0x72,0x8d,0x76,0x89,0x7a,0x85,0x7e,0x81}; static unsigned char dequant_ep_0q[5] = {0,64,128,192,255}; static unsigned char dequant_ep_1q[10] = {0x00,0xff,0x1c,0xe3,0x38,0xc7,0x54,0xab,0x71,0x8e}; static unsigned char dequant_ep_2q[20] = {0x00,0xff,0x43,0xbc,0x0d,0xf2,0x50,0xaf,0x1b,0xe4,0x5e,0xa1,0x28,0xd7,0x6b,0x94,0x36,0xc9,0x79,0x86}; static unsigned char dequant_ep_3q[40] = {0x00,0xff,0x20,0xdf,0x41,0xbe,0x61,0x9e,0x06,0xf9,0x27,0xd8,0x47,0xb8,0x68,0x97,0x0d,0xf2,0x2d,0xd2,0x4e,0xb1,0x6e,0x91,0x13,0xec,0x34,0xcb,0x54,0xab,0x75,0x8a,0x1a,0xe5,0x3a,0xc5,0x5b,0xa4,0x7b,0x84}; static unsigned char dequant_ep_4q[80] = {0x00,0xff,0x10,0xef,0x20,0xdf,0x30,0xcf,0x40,0xbf,0x50,0xaf,0x60,0x9f,0x70,0x8f,0x03,0xfc,0x13,0xec,0x23,0xdc,0x33,0xcc,0x43,0xbc,0x53,0xac,0x64,0x9b,0x74,0x8b,0x06,0xf9,0x16,0xe9,0x26,0xd9,0x36,0xc9,0x47,0xb8,0x57,0xa8,0x67,0x98,0x77,0x88,0x09,0xf6,0x19,0xe6,0x2a,0xd5,0x3a,0xc5,0x4a,0xb5,0x5a,0xa5,0x6a,0x95,0x7a,0x85,0x0d,0xf2,0x1d,0xe2,0x2d,0xd2,0x3d,0xc2,0x4d,0xb2,0x5d,0xa2,0x6d,0x92,0x7d,0x82}; static unsigned char dequant_ep_5q[160]= {0x00,0xff,0x08,0xf7,0x10,0xef,0x18,0xe7,0x20,0xdf,0x28,0xd7,0x30,0xcf,0x38,0xc7,0x40,0xbf,0x48,0xb7,0x50,0xaf,0x58,0xa7,0x60,0x9f,0x68,0x97,0x70,0x8f,0x78,0x87,0x01,0xfe,0x09,0xf6,0x11,0xee,0x19,0xe6,0x21,0xde,0x29,0xd6,0x31,0xce,0x39,0xc6,0x41,0xbe,0x49,0xb6,0x51,0xae,0x59,0xa6,0x61,0x9e,0x69,0x96,0x71,0x8e,0x79,0x86,0x03,0xfc,0x0b,0xf4,0x13,0xec,0x1b,0xe4,0x23,0xdc,0x2b,0xd4,0x33,0xcc,0x3b,0xc4,0x43,0xbc,0x4b,0xb4,0x53,0xac,0x5b,0xa4,0x63,0x9c,0x6b,0x94,0x73,0x8c,0x7b,0x84,0x04,0xfb,0x0c,0xf3,0x14,0xeb,0x1c,0xe3,0x24,0xdb,0x2c,0xd3,0x34,0xcb,0x3c,0xc3,0x44,0xbb,0x4c,0xb3,0x54,0xab,0x5c,0xa3,0x64,0x9b,0x6c,0x93,0x74,0x8b,0x7c,0x83,0x06,0xf9,0x0e,0xf1,0x16,0xe9,0x1e,0xe1,0x26,0xd9,0x2e,0xd1,0x36,0xc9,0x3e,0xc1,0x46,0xb9,0x4e,0xb1,0x56,0xa9,0x5e,0xa1,0x66,0x99,0x6e,0x91,0x76,0x89,0x7e,0x81}; static const struct { unsigned char extra, bits, *dequant; } astc_epvmode[] = { {0,1, dequant_ep_1b}, //2 {1,0, dequant_ep_0t}, //3 {0,2, dequant_ep_2b}, //4 {2,0, dequant_ep_0q}, //5 {1,1, dequant_ep_1t}, //6 {0,3, dequant_ep_3b}, //8 {2,1, dequant_ep_1q}, //10 {1,2, dequant_ep_2t}, //12 {0,4, dequant_ep_4b}, //16 {2,2, dequant_ep_2q}, //20 {1,3, dequant_ep_3t}, //24 {0,5, dequant_ep_5b}, //32 {2,3, dequant_ep_3q}, //40 {1,4, dequant_ep_4t}, //48 {0,6, dequant_ep_6b}, //64 {2,4, dequant_ep_4q}, //80 {1,5, dequant_ep_5t}, //96 {0,7, dequant_ep_7b}, //128 {2,5, dequant_ep_5q}, //160 {1,6, dequant_ep_6t}, //192 {0,8, dequant_ep_8b}, //256 //other modes don't make any sense }; /*static void ASTC_CalcDequant(void) { int i; int extra = 0; int bits = 1; int isweight = 1; int targbits = isweight?6:8; int v; static qboolean nospam; if (nospam) return; nospam = true; //binary: if (!extra) { for (bits = 1; bits <= (isweight?5:8); bits++) { Con_Printf("table: %s_%ib", isweight?"weight":"ep", bits); for (i = 0; i < (1<>bits; v|=v>>bits; v|=v>>bits; v|=v>>bits; v|=v>>bits; v|=v>>bits; v|=v>>bits; v|=v>>bits; if (isweight && v > 32) v++; //0-64 instead of 0-63 Con_Printf("0x%02x,", v); } Con_Printf("\n"); } } else if (extra == 1) { int A,B,C,D; for (bits = 1; bits <= (isweight?3:6); bits++) { Con_Printf("table: %s_%it:\n", isweight?"weight":"ep", bits); for (i = 0; i < ((2<>bits; break; case 2: A = (i&1)*(isweight?0x7f:0x1ff); B = ((i>>1)&1) * (isweight?0b1000101:0b100010110); C = isweight?25:93; D = i>>bits; break; case 3: A = (i&1)*(isweight?0x7f:0x1ff); B = ((i>>1)&1) * (isweight?0b0100001:0b010000101); //b B|= ((i>>2)&1) * (isweight?0b1000010:0b100001010); //c C = isweight?11:44; D = i>>bits; break; case 4: A = (i&1)*0x1ff; B = ((i>>1)&1) * 0b001000001; //b B|= ((i>>2)&1) * 0b010000010; //c B|= ((i>>3)&1) * 0b100000100; //d C = 22; D = i>>bits; break; case 5: A = (i&1)*0x1ff; B = ((i>>1)&1) * 0b000100000; //b B|= ((i>>2)&1) * 0b001000000; //c B|= ((i>>3)&1) * 0b010000001; //d B|= ((i>>4)&1) * 0b100000010; //e C = 11; D = i>>bits; break; case 6: A = (i&1)*0x1ff; B = ((i>>1)&1) * 0b000010000; //b B|= ((i>>2)&1) * 0b000100000; //c B|= ((i>>3)&1) * 0b001000000; //d B|= ((i>>4)&1) * 0b010000000; //e B|= ((i>>5)&1) * 0b100000001; //f C = 5; D = i>>bits; break; } v = D * C + B; v = v ^ A; v = (A & (isweight?0x20:0x80)) | (v >> 2); if (isweight && v > 32) v++; //0-64 instead of 0-63 Con_Printf("0x%02x,", v); } Con_Printf("\n"); } } else if (extra == 2) { int A,B,C,D; for (bits = 1; bits <= (isweight?2:5); bits++) { Con_Printf("table: %s_%iq:\n", isweight?"weight":"ep", bits); for (i = 0; i < ((4<>bits; break; case 2: A = (i&1)*(isweight?0x7f:0x1ff); B = ((i>>1)&1) * (isweight?0b1000010:0b100001100); C = isweight?13:54; D = i>>bits; break; case 3: A = (i&1)*0x1ff; B = ((i>>1)&1) * 0b010000010; //b B|= ((i>>2)&1) * 0b100000101; //c C = 26; D = i>>bits; break; case 4: A = (i&1)*0x1ff; B = ((i>>1)&1) * 0b001000000; //b B|= ((i>>2)&1) * 0b010000001; //c B|= ((i>>3)&1) * 0b100000010; //d C = 13; D = i>>bits; break; case 5: A = (i&1)*0x1ff; B = ((i>>1)&1) * 0b000100000; //b B|= ((i>>2)&1) * 0b001000000; //c B|= ((i>>3)&1) * 0b010000000; //d B|= ((i>>4)&1) * 0b100000001; //e C = 6; D = i>>bits; break; } v = D * C + B; v = v ^ A; v = (A & (isweight?0x20:0x80)) | (v >> 2); if (isweight && v > 32) v++; //0-64 instead of 0-63 Con_Printf("0x%02x,", v); } Con_Printf("\n"); } } }*/ static void ASTC_blue_contract(int *out, int r, int g, int b, int a) { out[0] = (r+b) >> 1; out[1] = (g+b) >> 1; out[2] = b; out[3] = a; } static int ASTC_bit_transfer_signed(int a, unsigned char *b) //returns new value for a. { *b >>= 1; *b |= a & 0x80; a >>= 1; a &= 0x3F; if((a&0x20)!=0) a=a-0x40; return a; } static void ASTC_clamp_unorm8(int *c) { c[0] = bound(0, c[0], 255); c[1] = bound(0, c[1], 255); c[2] = bound(0, c[2], 255); c[3] = bound(0, c[3], 255); } #ifdef ASTC_WITH_HDR static void ASTC_HDR_Mode_2(struct astc_part *p, unsigned char *v) { int y0,y1; if(v[1] >= v[0]) { y0 = (v[0] << 4); y1 = (v[1] << 4); } else { y0 = (v[1] << 4) + 8; y1 = (v[0] << 4) - 8; } Vector4Set(p->ep[0], y0, y0, y0, 0x780); Vector4Set(p->ep[1], y1, y1, y1, 0x780); p->hdr = 0xf; } static void ASTC_HDR_Mode_3(struct astc_part *p, unsigned char *v) { int y0, y1, d; if((v[0]&0x80) != 0) { y0 = ((v[1] & 0xE0) << 4) | ((v[0] & 0x7F) << 2); d = (v[1] & 0x1F) << 2; } else { y0 = ((v[1] & 0xF0) << 4) | ((v[0] & 0x7F) << 1); d = (v[1] & 0x0F) << 1; } y1 = y0 + d; if(y1 > 0xFFF) y1 = 0xFFF; Vector4Set(p->ep[0], y0, y0, y0, 0x780); Vector4Set(p->ep[1], y1, y1, y1, 0x780); p->hdr = 0xf; } static void ASTC_HDR_Mode_7(struct astc_part *p, unsigned char *v) { int modeval = ((v[0]&0xC0)>>6) | ((v[1]&0x80)>>5) | ((v[2]&0x80)>>4); int majcomp; int mode; static const int shamts[6] = { 1,1,2,3,4,5 }; int shamt,t; int red, green, blue, scale; int x0,x1,x2,x3,x4,x5,x6,ohm; if( (modeval & 0xC ) != 0xC ) { majcomp = modeval >> 2; mode = modeval & 3; } else if( modeval != 0xF ) { majcomp = modeval & 3; mode = 4; } else { majcomp = 0; mode = 5; } red = v[0] & 0x3f; green = v[1] & 0x1f; blue = v[2] & 0x1f; scale = v[3] & 0x1f; x0 = (v[1] >> 6) & 1; x1 = (v[1] >> 5) & 1; x2 = (v[2] >> 6) & 1; x3 = (v[2] >> 5) & 1; x4 = (v[3] >> 7) & 1; x5 = (v[3] >> 6) & 1; x6 = (v[3] >> 5) & 1; ohm = 1 << mode; if( ohm & 0x30 ) green |= x0 << 6; if( ohm & 0x3A ) green |= x1 << 5; if( ohm & 0x30 ) blue |= x2 << 6; if( ohm & 0x3A ) blue |= x3 << 5; if( ohm & 0x3D ) scale |= x6 << 5; if( ohm & 0x2D ) scale |= x5 << 6; if( ohm & 0x04 ) scale |= x4 << 7; if( ohm & 0x3B ) red |= x4 << 6; if( ohm & 0x04 ) red |= x3 << 6; if( ohm & 0x10 ) red |= x5 << 7; if( ohm & 0x0F ) red |= x2 << 7; if( ohm & 0x05 ) red |= x1 << 8; if( ohm & 0x0A ) red |= x0 << 8; if( ohm & 0x05 ) red |= x0 << 9; if( ohm & 0x02 ) red |= x6 << 9; if( ohm & 0x01 ) red |= x3 << 10; if( ohm & 0x02 ) red |= x5 << 10; shamt = shamts[mode]; red <<= shamt; green <<= shamt; blue <<= shamt; scale <<= shamt; if( mode != 5 ) { green = red - green; blue = red - blue; } if( majcomp == 1 ) { t = red; red = green; green = t; } if( majcomp == 2 ) { t = red; red = blue; blue = t; } p->ep[1][0] = bound( 0, red, 0xFFF ); p->ep[1][1] = bound( 0, green, 0xFFF ); p->ep[1][2] = bound( 0, blue, 0xFFF ); p->ep[0][0] = bound( 0, red - scale, 0xFFF ); p->ep[0][1] = bound( 0, green - scale, 0xFFF ); p->ep[0][2] = bound( 0, blue - scale, 0xFFF ); p->ep[1][3] = p->ep[0][3] = 0x780; p->hdr = 0xf; } static void ASTC_HDR_Mode_11(struct astc_part *p, unsigned char *v) { static const int dbitstab[8] = {7,6,7,6,5,6,5,6}; int shamt; int majcomp = ((v[4] & 0x80) >> 7) | ((v[5] & 0x80) >> 6); int mode,va,vb0,vb1,vc,vd0,vd1; int x0,x1,x2,x3,x4,x5,ohm; if( majcomp == 3 ) { Vector4Set(p->ep[0], v[0] << 4, v[2] << 4, (v[4] & 0x7f) << 5, 0x780); Vector4Set(p->ep[1], v[1] << 4, v[3] << 4, (v[5] & 0x7f) << 5, 0x780); p->hdr = 0xf; return; } mode = ((v[1]&0x80)>>7) | ((v[2]&0x80)>>6) | ((v[3]&0x80)>>5); va = v[0] | ((v[1] & 0x40) << 2); vb0 = v[2] & 0x3f; vb1 = v[3] & 0x3f; vc = v[1] & 0x3f; vd0 = v[4] & 0x7f; vd1 = v[5] & 0x7f; if (vd0 & (1<<(dbitstab[mode]-1))) vd0 |= -1 & ~((1u<> 6) & 1; x1 = (v[3] >> 6) & 1; x2 = (v[4] >> 6) & 1; x3 = (v[5] >> 6) & 1; x4 = (v[4] >> 5) & 1; x5 = (v[5] >> 5) & 1; ohm = 1 << mode; if( ohm & 0xA4 ) va |= x0 << 9; if( ohm & 0x08 ) va |= x2 << 9; if( ohm & 0x50 ) va |= x4 << 9; if( ohm & 0x50 ) va |= x5 << 10; if( ohm & 0xA0 ) va |= x1 << 10; if( ohm & 0xC0 ) va |= x2 << 11; if( ohm & 0x04 ) vc |= x1 << 6; if( ohm & 0xE8 ) vc |= x3 << 6; if( ohm & 0x20 ) vc |= x2 << 7; if( ohm & 0x5B ) vb0 |= x0 << 6; if( ohm & 0x5B ) vb1 |= x1 << 6; if( ohm & 0x12 ) vb0 |= x2 << 7; if( ohm & 0x12 ) vb1 |= x3 << 7; // Now shift up so that major component is at top of 12-bit value shamt = (mode >> 1) ^ 3; va <<= shamt; vb0 <<= shamt; vb1 <<= shamt; vc <<= shamt; vd0 <<= shamt; vd1 <<= shamt; p->ep[1][0] = bound( 0, va, 0xFFF ); p->ep[1][1] = bound( 0, va - vb0, 0xFFF ); p->ep[1][2] = bound( 0, va - vb1, 0xFFF ); p->ep[0][0] = bound( 0, va - vc, 0xFFF ); p->ep[0][1] = bound( 0, va - vb0 - vc - vd0, 0xFFF ); p->ep[0][2] = bound( 0, va - vb1 - vc - vd1, 0xFFF ); if( majcomp == 1 ) { p->ep[0][3] = p->ep[0][0]; p->ep[0][0] = p->ep[0][1]; p->ep[0][1] = p->ep[0][3]; p->ep[1][3] = p->ep[1][0]; p->ep[1][0] = p->ep[1][1]; p->ep[1][1] = p->ep[1][3]; } else if( majcomp == 2 ) { p->ep[0][3] = p->ep[0][0]; p->ep[0][0] = p->ep[0][2]; p->ep[0][2] = p->ep[0][3]; p->ep[1][3] = p->ep[1][0]; p->ep[1][0] = p->ep[1][2]; p->ep[1][2] = p->ep[1][3]; } p->ep[0][3] = p->ep[1][3] = 0x780; p->hdr = 0xf; } static void ASTC_HDR_Mode_14(struct astc_part *p, unsigned char *v) { ASTC_HDR_Mode_11(p, v); p->ep[0][3] = v[6]; p->ep[1][3] = v[7]; p->hdr &= 0x7; } static void ASTC_HDR_Mode_15(struct astc_part *p, unsigned char *v) { int v6=v[6], v7=v[7]; int mode; ASTC_HDR_Mode_11(p,v); mode = ((v6 >> 7) & 1) | ((v7 >> 6) & 2); v6 &= 0x7F; v7 &= 0x7F; if(mode==3) { p->ep[0][3] = v6 << 5; p->ep[1][3] = v7 << 5; } else { v6 |= (v7 << (mode+1)) & 0x780; v7 &= (0x3F >> mode); v7 ^= 0x20 >> mode; v7 -= 0x20 >> mode; v6 <<= (4-mode); v7 <<= (4-mode); v7 += v6; v7 = bound(0, v7, 0xFFF); p->ep[0][3] = v6; p->ep[1][3] = v7; } } #endif static void ASTC_DecodeEndpoints(struct astc_block_info *b, unsigned char *v) { int i, t0, t1, t3, t5, t7; for (i = 0; i < b->partitions; i++) { #ifdef ASTC_WITH_HDR b->part[i].hdr = 0; #endif switch (b->part[i].mode & 15) { #ifdef ASTC_WITH_HDR case 2: //HDR Luminance, large range ASTC_HDR_Mode_2(&b->part[i], v); break; case 3: //HDR Luminance, small range ASTC_HDR_Mode_3(&b->part[i], v); break; case 7: //HDR RGB, base+scale ASTC_HDR_Mode_7(&b->part[i], v); break; case 11: //HDR RGB ASTC_HDR_Mode_11(&b->part[i], v); break; case 14: //HDR RGB + LDR Alpha ASTC_HDR_Mode_14(&b->part[i], v); break; case 15: //HDR RGB + HDR Alpha ASTC_HDR_Mode_15(&b->part[i], v); break; #endif default: //the error colour - for unsupported hdr endpoints. unreachable when hdr is enabled. just fill it with the error colour. Vector4Set(b->part[i].ep[0], 0xff, 0, 0xff, 0xff); Vector4Set(b->part[i].ep[1], 0xff, 0, 0xff, 0xff); break; case 0: //LDR Luminance, direct Vector4Set(b->part[i].ep[0], v[0], v[0], v[0], 0xff); Vector4Set(b->part[i].ep[1], v[1], v[1], v[1], 0xff); break; case 1: //LDR Luminance, base+offset t0 = (v[0]>>2)|(v[1]&0xc0); t1 = t0+(v[1]&0x3f); if (t1>0xff) t1=0xff; Vector4Set(b->part[i].ep[0], t0, t0, t0, 0xff); Vector4Set(b->part[i].ep[1], t1, t1, t1, 0xff); break; case 4: //LDR Luminance+Alpha,direct Vector4Set(b->part[i].ep[0], v[0], v[0], v[0], v[2]); Vector4Set(b->part[i].ep[1], v[1], v[1], v[1], v[3]); break; case 5: //LDR Luminance+Alpha, base+offset t1 = ASTC_bit_transfer_signed(v[1],&v[0]); t3 = ASTC_bit_transfer_signed(v[3],&v[2]); Vector4Set(b->part[i].ep[0],v[0],v[0],v[0],v[2]); Vector4Set(b->part[i].ep[1],v[0]+t1,v[0]+t1,v[0]+t1,v[2]+t3); ASTC_clamp_unorm8(b->part[i].ep[0]); ASTC_clamp_unorm8(b->part[i].ep[1]); break; case 6: //LDR RGB, base+scale Vector4Set(b->part[i].ep[0], ((int)v[0]*(int)v[3])>>8, ((int)v[1]*(int)v[3])>>8, ((int)v[2]*(int)v[3])>>8, 0xff); Vector4Set(b->part[i].ep[1], v[0], v[1], v[2], 0xff); break; case 8: //LDR RGB, Direct t0 = (int)v[0]+(int)v[2]+(int)v[4]; t1 = (int)v[1]+(int)v[3]+(int)v[5]; if (t1>=t0) { Vector4Set(b->part[i].ep[0], v[0],v[2],v[4],0xff); Vector4Set(b->part[i].ep[1], v[1],v[3],v[5],0xff); } else { ASTC_blue_contract(b->part[i].ep[0], v[1],v[3],v[5], 0xff); ASTC_blue_contract(b->part[i].ep[1], v[0],v[2],v[4], 0xff); } break; case 9: //LDR RGB, base+offset t1 = ASTC_bit_transfer_signed(v[1],&v[0]); t3 = ASTC_bit_transfer_signed(v[3],&v[2]); t5 = ASTC_bit_transfer_signed(v[5],&v[4]); if(t1+t3+t5 >= 0) { Vector4Set(b->part[i].ep[0],v[0],v[2],v[4],0xff); Vector4Set(b->part[i].ep[1],v[0]+t1,v[2]+t3,v[4]+t5,0xff); } else { ASTC_blue_contract(b->part[i].ep[0], v[0]+t1,v[2]+t3,v[4]+t5, 0xff); ASTC_blue_contract(b->part[i].ep[1], v[0],v[2],v[4], 0xff); } ASTC_clamp_unorm8(b->part[i].ep[0]); ASTC_clamp_unorm8(b->part[i].ep[1]); break; case 10: //LDR RGB, base+scale plus two A Vector4Set(b->part[i].ep[0], ((int)v[0]*v[3])>>8, ((int)v[1]*v[3])>>8, ((int)v[2]*v[3])>>8, v[4]); Vector4Set(b->part[i].ep[1], v[0], v[1], v[2], v[5]); break; case 12: //LDR RGBA, direct if (v[1]+(int)v[3]+v[5]>=v[0]+(int)v[2]+v[4]) { Vector4Set(b->part[i].ep[0], v[0],v[2],v[4],v[6]); Vector4Set(b->part[i].ep[1], v[1],v[3],v[5],v[7]); } else { ASTC_blue_contract(b->part[i].ep[0], v[1],v[3],v[5],v[7]); ASTC_blue_contract(b->part[i].ep[1], v[0],v[2],v[4],v[6]); } break; case 13: //LDR RGBA, base+offset t1 = ASTC_bit_transfer_signed(v[1],&v[0]); t3 = ASTC_bit_transfer_signed(v[3],&v[2]); t5 = ASTC_bit_transfer_signed(v[5],&v[4]); t7 = ASTC_bit_transfer_signed(v[7],&v[6]); if(t1+t3+t5>=0) { Vector4Set(b->part[i].ep[0], v[0],v[2],v[4],v[6]); Vector4Set(b->part[i].ep[1], v[0]+t1,v[2]+t3,v[4]+t5,v[6]+t7); } else { ASTC_blue_contract(b->part[i].ep[0], v[0]+t1,v[2]+t3,v[4]+t5,v[6]+t7); ASTC_blue_contract(b->part[i].ep[1], v[0],v[2],v[4],v[6]); } ASTC_clamp_unorm8(b->part[i].ep[0]); ASTC_clamp_unorm8(b->part[i].ep[1]); break; } v += ((b->part[i].mode>>2)+1)<<1; } } static void ASTC_ReadEndpoints(struct astc_block_info *b) { int i; int cembits; unsigned char epv[18]; //maximum raw endpoint values, char epvalues; unsigned char gahffs[16], t; //figure out how many raw values we need epvalues = 0; for (i = 0; i < b->partitions; i++) epvalues += ((b->part[i].mode>>2)+1)<<1; if (epvalues > countof(epv)) { b->status = ASTC_ERROR; return; } //the endpoint bits are encoded using the largest size available that'll still fit, yielding raw values between 0-255. for(i = countof(astc_epvmode)-1; i >= 0; i--) { cembits = ASTC_DecodeSize(epvalues, astc_epvmode[i].bits, astc_epvmode[i].extra); if(cembits <= b->ep_bits) { //read the values. ASTC_Decode(b->in, epv, epvalues, b->config_bits, astc_epvmode[i].bits, astc_epvmode[i].extra, astc_epvmode[i].dequant); //and decode them. ASTC_DecodeEndpoints(b, epv); //weight bits are backwards (gah! ffs!) //so swap them around so our decode function doesn't need to care for (i = 0; i < countof(gahffs); i++) { t = b->in[i]; t = (t>>4)|(t<<4); t = ((t&0xcc)>>2)|((t&0x33)<<2); t = ((t&0xaa)>>1)|((t&0x55)<<1); gahffs[15-i] = t; } //weights are aligned at the end... now the start. gah! ffs! ASTC_Decode(gahffs, b->weights, b->wcount[3], 0, astc_weightmode[b->precision].bits, astc_weightmode[b->precision].extra, astc_weightmode[b->precision].dequant); return; } } b->status = ASTC_ERROR; } static unsigned int hash52(unsigned int p) { p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4; p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3; p ^= p << 6; p ^= p >> 17; return p; } static int ASTC_ChoosePartition(int seed, int x, int y, int z, int partitions, int smallblock) { int sh1, sh2, sh3, a,b,c,d; unsigned int rnum; unsigned char seed1,seed2,seed3,seed4,seed5,seed6,seed7,seed8,seed9,seed10,seed11,seed12; if (partitions==1) return 0; if (smallblock) { x <<= 1; y <<= 1; z <<= 1; } seed += (partitions-1) * 1024; rnum = hash52(seed); seed1 = rnum & 0xF; seed2 = (rnum >> 4) & 0xF; seed3 = (rnum >> 8) & 0xF; seed4 = (rnum >> 12) & 0xF; seed5 = (rnum >> 16) & 0xF; seed6 = (rnum >> 20) & 0xF; seed7 = (rnum >> 24) & 0xF; seed8 = (rnum >> 28) & 0xF; seed9 = (rnum >> 18) & 0xF; seed10 = (rnum >> 22) & 0xF; seed11 = (rnum >> 26) & 0xF; seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF; seed1 *= seed1; seed2 *= seed2; seed3 *= seed3; seed4 *= seed4; seed5 *= seed5; seed6 *= seed6; seed7 *= seed7; seed8 *= seed8; seed9 *= seed9; seed10 *= seed10; seed11 *= seed11; seed12 *= seed12; if (seed & 1) { sh1 = ((seed&2) ? 4:5); sh2 = ((partitions==3) ? 6:5); } else { sh1 = ((partitions==3) ? 6:5); sh2 = ((seed&2) ? 4:5); } sh3 = (seed & 0x10) ? sh1 : sh2; seed1 >>= sh1; seed2 >>= sh2; seed3 >>= sh1; seed4 >>= sh2; seed5 >>= sh1; seed6 >>= sh2; seed7 >>= sh1; seed8 >>= sh2; seed9 >>= sh3; seed10 >>= sh3; seed11 >>= sh3; seed12 >>= sh3; a = seed1*x + seed2*y + seed11*z + (rnum >> 14); b = seed3*x + seed4*y + seed12*z + (rnum >> 10); c = seed5*x + seed6*y + seed9 *z + (rnum >> 6); d = seed7*x + seed8*y + seed10*z + (rnum >> 2); a &= 0x3F; b &= 0x3F; c &= 0x3F; d &= 0x3F; if (partitions < 4) d = 0; if (partitions < 3) c = 0; if (a >= b && a >= c && a >= d) return 0; else if (b >= c && b >= d) return 1; else if (c >= d) return 2; else return 3; } #endif #ifdef ASTC_WITH_LDR //Spits out 8-bit RGBA data for a single block. Any HDR blocks will result in the error colour. //sRGB can be applied by the caller, if needed. ASTC_PUBLIC void ASTC_Decode_LDR8(unsigned char *in, unsigned char *out, int pixstride, int layerstride, int bw, int bh, int bd) { struct astc_block_info b; int x, y; int stride = pixstride*4; #ifdef ASTC_WITH_3D int z; layerstride = layerstride*4-(stride*bh); #else if (bd != 1) return; //error! #endif b.in = in; b.blocksize[0] = bw; b.blocksize[1] = bh; b.blocksize[2] = bd; ASTC_ReadBlockMode(&b); if (b.status == ASTC_VOID_LDR) { //void extent //Note: we don't validate the extents. for (y = 0; y < bh; y++, out += stride) for (x = 0; x < bw; x++) { out[(x<<2)+0] = in[9]; out[(x<<2)+1] = in[11]; out[(x<<2)+2] = in[13]; out[(x<<2)+3] = in[15]; } return; } if (b.status == ASTC_OKAY) ASTC_ReadPartitions(&b); if (b.status == ASTC_OKAY) ASTC_ReadEndpoints(&b); if (b.status == ASTC_OKAY) { #define N b.wcount[0] #define M b.wcount[1] int s1=1<>6; fr=r&0xf; #endif for (y = 0; y < bh; y++, out += stride) { t = ((dt*y)*(b.wcount[1]-1)+32)>>6; ft=t&0xf; for (x = 0; x < bw; x++) { p = &b.part[ASTC_ChoosePartition(b.partindex, x,y,0, b.partitions, smallblock)]; s = ((ds*x)*(b.wcount[0]-1)+32)>>6; fs=s&0xf; #ifdef ASTC_WITH_3D if (bd != 1) { //3d blocks use simplex interpolation instead of 8-way interpolation. its easier for hardware but more cycles for us. if (fs>fr) { //figure out which weights/factors to use. if (ft>fr) { if (fs>ft) s1=1, s2=N, w00=16-fs, w01=fs-ft, w10=ft-fr, w11=fr; else s1=N, s2=1, w00=16-ft, w01=ft-fs, w10=fs-fr, w11=fr; } else s1=1, s2=N*M, w00=16-fs, w01=fs-fr, w10=fr-ft, w11=ft; } else { if (fs>ft) s1=N*M, s2=1, w00=16-fr, w01=fr-fs, w10=fs-ft, w11=ft; else { if (ft>fr) s1=N, s2=N*M, w00=16-ft, w01=ft-fr, w10=fr-fs, w11=fs; else s1=N*M, s2=N, w00=16-fr, w01=fr-ft, w10=ft-fs, w11=fs; } } s1 <<= b.dualplane; s2 <<= b.dualplane; s2+=s1; //s3 = (N*M+N+1)<>4)+(t>>4)*N+(r>>4)*N*M) << b.dualplane; } else #endif { //s1 = 1<> 4; w10 = ft - w11; w01 = fs - w11; w00 = 16 - fs - ft + w11; v0 = ((s>>4)+(t>>4)*N) << b.dualplane; } w = ( w00*b.weights[v0] + w01*b.weights[v0+s1] + w10*b.weights[v0+s2] + w11*b.weights[v0+s3] + 8) >> 4; out[(x<<2)+0] = ((64-w)*p->ep[0][0] + w*p->ep[1][0])>>6; out[(x<<2)+1] = ((64-w)*p->ep[0][1] + w*p->ep[1][1])>>6; out[(x<<2)+2] = ((64-w)*p->ep[0][2] + w*p->ep[1][2])>>6; out[(x<<2)+3] = ((64-w)*p->ep[0][3] + w*p->ep[1][3])>>6; if (b.dualplane) { //dual planes has a second set of weights that override a single channel v0++; w = ( w00*b.weights[v0] + w01*b.weights[v0+s1] + w10*b.weights[v0+s2] + w11*b.weights[v0+s3] + 8) >> 4; out[(x<<2)+b.ccs] = ((64-w)*p->ep[0][b.ccs] + w*p->ep[1][b.ccs])>>6; } } } } } else { //error colour == magenta #ifdef ASTC_WITH_3D for (z = 0; z < bd; z++, out += layerstride) #endif for (y = 0; y < bh; y++, out += stride) for (x = 0; x < bw; x++) { out[(x<<2)+0] = 0xff; out[(x<<2)+1] = 0; out[(x<<2)+2] = 0xff; out[(x<<2)+3] = 0xff; } } } #endif #ifdef ASTC_WITH_HDR static unsigned short ASTC_GenHalffloat(int hdr, int rawval) { if (hdr) { int fp16, m; fp16 = (rawval&0xF800) >> 1; m = rawval&0x7FF; if (m < 512) fp16 |= (3*m)>>3; else if (m >= 1536) fp16 |= (5*m - 2048)>>3; else fp16 |= (4*m - 512)>>3; return fp16; } else { union { float f; unsigned int u; } u = {rawval/65535.0}; int e = 0; int m; e = ((u.u>>23)&0xff) - 127; if (e < -15) return 0; //too small exponent, treat it as a 0 denormal if (e > 15) m = 0; //infinity instead of a nan else m = (u.u&((1<<23)-1))>>13; return ((e+15)<<10) | m; } } //Spits out half-float RGBA data for a single block. ASTC_PUBLIC void ASTC_Decode_HDR(unsigned char *in, unsigned short *out, int pixstride, int layerstride, int bw, int bh, int bd) { int x, y; int stride = pixstride*4; struct astc_block_info b; #ifdef ASTC_WITH_3D int z; layerstride = layerstride*4-(stride*bh); #else if (bd != 1) return; //error! #endif b.in = in; b.blocksize[0] = bw; b.blocksize[1] = bh; b.blocksize[2] = bd; ASTC_ReadBlockMode(&b); if (b.status == ASTC_VOID_HDR) { //void extent //Note: we don't validate the extents. for (y = 0; y < bh; y++, out += stride) for (x = 0; x < bw; x++) { //hdr void extents already use fp16 out[(x<<2)+0] = in[8] | (in[9]<<8); out[(x<<2)+1] = in[10] | (in[11]<<8); out[(x<<2)+2] = in[12] | (in[13]<<8); out[(x<<2)+3] = in[14] | (in[15]<<8); } return; } if (b.status == ASTC_VOID_LDR) { //void extent //Note: we don't validate the extents. for (y = 0; y < bh; y++, out += stride) for (x = 0; x < bw; x++) { out[(x<<2)+0] = ASTC_GenHalffloat(0, in[8] | (in[9]<<8)); out[(x<<2)+1] = ASTC_GenHalffloat(0, in[10] | (in[11]<<8)); out[(x<<2)+2] = ASTC_GenHalffloat(0, in[12] | (in[13]<<8)); out[(x<<2)+3] = ASTC_GenHalffloat(0, in[14] | (in[15]<<8)); } return; } if (b.status == ASTC_OKAY) ASTC_ReadPartitions(&b); if (b.status == ASTC_OKAY) ASTC_ReadEndpoints(&b); if (b.status == ASTC_OKAY) { #define N b.wcount[0] #define M b.wcount[1] int s1=1<>6; fr=s&0xf; #endif for (y = 0; y < bh; y++, out += stride) { t = ((dt*y)*(b.wcount[1]-1)+32)>>6; ft=s&0xf; for (x = 0; x < bw; x++) { p = &b.part[ASTC_ChoosePartition(b.partindex, x,y,0, b.partitions, smallblock)]; s = ((ds*x)*(b.wcount[0]-1)+32)>>6; fs=s&0xf; #ifdef ASTC_WITH_3D if (bd != 1) { //3d blocks use simplex interpolation instead of 8-way interpolation. its easier for hardware but more cycles for us. if (fs>fr) { //figure out which weights/factors to use. if (ft>fr) { if (fs>ft) s1=1, s2=N, w00=16-fs, w01=fs-ft, w10=ft-fr, w11=fr; else s1=N, s2=1, w00=16-ft, w01=ft-fs, w10=fs-fr, w11=fr; } else s1=1, s2=N*M, w00=16-fs, w01=fs-fr, w10=fr-ft, w11=ft; } else { if (fs>ft) s1=N*M, s2=1, w00=16-fr, w01=fr-fs, w10=fs-ft, w11=ft; else { if (ft>fr) s1=N, s2=N*M, w00=16-ft, w01=ft-fr, w10=fr-fs, w11=fs; else s1=N*M, s2=N, w00=16-fr, w01=fr-ft, w10=ft-fs, w11=fs; } } s1 <<= b.dualplane; s2 <<= b.dualplane; s2+=s1; //s3 = (N*M+N+1)<>4))+((t>>4)*N)+(r>>4)*N*M) << b.dualplane; } else #endif { //s1 = 1<> 4; w10 = ft - w11; w01 = fs - w11; w00 = 16 - fs - ft + w11; v0 = (((s>>4))+(t>>4)*N) << b.dualplane; } w = ( w00*b.weights[v0] + w01*b.weights[v0+s1] + w10*b.weights[v0+s2] + w11*b.weights[v0+s3] + 8) >> 4; out[(x<<2)+0] = ASTC_GenHalffloat(p->hdr&1, ((64-w)*p->ep[0][0] + w*p->ep[1][0])>>6); out[(x<<2)+1] = ASTC_GenHalffloat(p->hdr&1, ((64-w)*p->ep[0][1] + w*p->ep[1][1])>>6); out[(x<<2)+2] = ASTC_GenHalffloat(p->hdr&1, ((64-w)*p->ep[0][2] + w*p->ep[1][2])>>6); out[(x<<2)+3] = ASTC_GenHalffloat(p->hdr&8, ((64-w)*p->ep[0][3] + w*p->ep[1][3])>>6); if (b.dualplane) { //dual planes has a second set of weights that override a single channel v0++; w = ( w00*b.weights[v0] + w01*b.weights[v0+s1] + w10*b.weights[v0+s2] + w11*b.weights[v0+s3] + 8) >> 4; out[(x<<2)+b.ccs] = ASTC_GenHalffloat(p->hdr&(1<ep[0][b.ccs] + w*p->ep[1][b.ccs])>>6); } } } } } else { //error colour == magenta #ifdef ASTC_WITH_3D for (z = 0; z < bd; z++, out += layerstride) #endif for (y = 0; y < bh; y++, out += stride) for (x = 0; x < bw; x++) { out[(x<<2)+0] = 0xf<<10; out[(x<<2)+1] = 0; out[(x<<2)+2] = 0xf<<10; out[(x<<2)+3] = 0xf<<10; } } } #endif