mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	Better 1.5 bit quantization (#5971)
* Trying blocvks of 16 for IQ1_S - seems slightly better * iq1s_blocks16: Adjust scale fudge factor to 1.125 * iq1s_blocks16: going to blocks of 32 with 2048 lattice points, so same bpw. This is even better than blocks of 16. Should I try blocks of 64? But to keep the same bpw, when I go to 4096 lattice points, I need to remove blocks alltogether and just have superblocks of 256 weights. * iq1s_blocks16: Use 2*<x^2> as sigma2 in weight adjustment * iq1s_blocks16: scalar and AVX2 dot products * iq1s_blocks16: CUDA dot product * iq1s_blocks16: Metal works, Neon does not Metal works but TG is dog slow (35 t/s). PP is OKish (493 t/s). Not seeing the bug in the Neon implementation for now. * iq1s_blocks16: fixed Neon * iq1s_blocks16: very slightly faster TG on Metal Still pathetic at 37 t/s * iq1s_blocks16: speedup Metal by packing codebook into uint32_t's * Formatting * iq1s_blocks16: uint32_t codebook is also better in CUDA TG-128 is now 204 t/s up from 194 t/s. PP-512 is 5890 t/s, so significantly better than other quants * iq1s_blocks16: slightly faster Neon dot product * iq1s_blocks16: faster AVX2 dot product * iq1s_blocks16: adjust to ggml-common.h --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
		
							
								
								
									
										903
									
								
								ggml-common.h
									
									
									
									
									
								
							
							
						
						
									
										903
									
								
								ggml-common.h
									
									
									
									
									
								
							| @@ -644,136 +644,781 @@ GGML_TABLE_BEGIN(uint32_t, iq3s_grid, 512) | |||||||
|     0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101, |     0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101, | ||||||
| GGML_TABLE_END() | GGML_TABLE_END() | ||||||
|  |  | ||||||
| #define NGRID_IQ2XXS 512 | #define NGRID_IQ1S 2048 | ||||||
| GGML_TABLE_BEGIN(uint64_t, iq1s_grid, NGRID_IQ2XXS) | #if defined(GGML_COMMON_IMPL_C) | ||||||
|     0xffffffffffff0101, 0xffffffffff01ff00, 0xffffffffff010100, 0xffffffff00000000, | GGML_TABLE_BEGIN(uint64_t, iq1s_grid, NGRID_IQ1S) | ||||||
|     0xffffffff01ff00ff, 0xffffffff01ff0001, 0xffffffff0101ffff, 0xffffffff0101ff01, |     0xffffffffffffffff, 0xffffffffffffff01, 0xffffffffffff0000, 0xffffffffffff01ff, | ||||||
|     0xffffff00ff000000, 0xffffff000000ff00, 0xffffff00000000ff, 0xffffff0000000100, |     0xffffffffffff0101, 0xffffffffff00ff00, 0xffffffffff000000, 0xffffffffff01ffff, | ||||||
|     0xffffff0000010000, 0xffffff0001000000, 0xffffff01ffff00ff, 0xffffff01ff01ff00, |     0xffffffffff01ff01, 0xffffffffff0101ff, 0xffffffffff010101, 0xffffffff00ff0000, | ||||||
|     0xffffff01ff010100, 0xffffff0100000001, 0xffffff0101ffff00, 0xffffff0101ff0101, |     0xffffffff0000ff00, 0xffffffff000000ff, 0xffffffff00000001, 0xffffffff00010000, | ||||||
|     0xffffff0101010100, 0xffff00ffff00ff01, 0xffff00ffff0000ff, 0xffff00ff00ff0100, |     0xffffffff01ffffff, 0xffffffff01ffff01, 0xffffffff01ff01ff, 0xffffffff01ff0101, | ||||||
|     0xffff00ff0100ff00, 0xffff00ff010001ff, 0xffff0000ff0101ff, 0xffff000000ffff00, |     0xffffffff01000000, 0xffffffff0101ffff, 0xffffffff0101ff01, 0xffffffff010101ff, | ||||||
|     0xffff000000000000, 0xffff00000001ff01, 0xffff000001000101, 0xffff0000010100ff, |     0xffffffff01010101, 0xffffff00ffff00ff, 0xffffff00ffff0000, 0xffffff00ff00ff00, | ||||||
|     0xffff0001ffff0100, 0xffff00010000ff00, 0xffff000100010101, 0xffff000101000000, |     0xffffff00ff0000ff, 0xffffff00ff000001, 0xffffff00ff000100, 0xffffff00ff000101, | ||||||
|     0xffff01ffffff0000, 0xffff01ffff01ffff, 0xffff01ffff010100, 0xffff01ff00000000, |     0xffffff00ff010000, 0xffffff0000ffff00, 0xffffff0000ff0001, 0xffffff0000ff0100, | ||||||
|     0xffff01ff01ffffff, 0xffff01ff01ff0001, 0xffff01ff0101ffff, 0xffff01ff01010001, |     0xffffff000000ff01, 0xffffff0000000000, 0xffffff0000000101, 0xffffff000001ff00, | ||||||
|     0xffff0100ffffff01, 0xffff01000000ffff, 0xffff010000000100, 0xffff010001ff01ff, |     0xffffff00000100ff, 0xffffff0000010001, 0xffffff00000101ff, 0xffffff0001ff0000, | ||||||
|     0xffff010001000000, 0xffff0101ff000000, 0xffff0101000101ff, 0xffff010101ffff01, |     0xffffff000100ff00, 0xffffff00010000ff, 0xffffff0001000001, 0xffffff0001010000, | ||||||
|     0xffff01010101ff00, 0xff00ffffff000000, 0xff00ffff00ffff00, 0xff00ffff00000001, |     0xffffff01ffffffff, 0xffffff01ffffff01, 0xffffff01ffff01ff, 0xffffff01ffff0101, | ||||||
|     0xff00ffff000001ff, 0xff00ffff01010000, 0xff00ff00ffff0000, 0xff00ff00ff00ff00, |     0xffffff01ff000000, 0xffffff01ff01ffff, 0xffffff01ff01ff01, 0xffffff01ff0101ff, | ||||||
|     0xff00ff00ff0000ff, 0xff00ff00ff000100, 0xff00ff00ff010001, 0xff00ff0000ff0001, |     0xffffff01ff010101, 0xffffff0100ff0000, 0xffffff010000ff00, 0xffffff0100000100, | ||||||
|     0xff00ff000000ffff, 0xff00ff0000000000, 0xff00ff000001ff00, 0xff00ff0000010100, |     0xffffff01000100ff, 0xffffff0100010100, 0xffffff0101ffffff, 0xffffff0101ffff01, | ||||||
|     0xff00ff0001ff0000, 0xff00ff000100ff00, 0xff00ff0001000100, 0xff00ff01ff000000, |     0xffffff0101ff01ff, 0xffffff0101ff0101, 0xffffff010100ff00, 0xffffff0101000000, | ||||||
|     0xff00ff0100ff0000, 0xff00ff01000001ff, 0xff00ff0101010001, 0xff0000ff00000000, |     0xffffff0101000100, 0xffffff010101ffff, 0xffffff010101ff01, 0xffffff01010101ff, | ||||||
|     0xff0000ff0001ff00, 0xff0000ff00010100, 0xff000000ffff0101, 0xff000000ff000000, |     0xffffff0101010101, 0xffff00ffff00ff00, 0xffff00ffff0000ff, 0xffff00ffff000001, | ||||||
|     0xff000000ff01ff00, 0xff00000000ff0000, 0xff0000000000ff00, 0xff000000000000ff, |     0xffff00ffff010000, 0xffff00ff00ffff00, 0xffff00ff00ff0100, 0xffff00ff00000000, | ||||||
|     0xff00000000000000, 0xff00000000000001, 0xff00000000000100, 0xff0000000001ffff, |     0xffff00ff00000101, 0xffff00ff000100ff, 0xffff00ff00010000, 0xffff00ff0100ff00, | ||||||
|     0xff00000000010000, 0xff00000001000000, 0xff00000001010100, 0xff000001ff00ff01, |     0xffff00ff01000100, 0xffff00ff01010000, 0xffff0000ffffff00, 0xffff0000ffff00ff, | ||||||
|     0xff000001ff0100ff, 0xff00000100000000, 0xff0000010001ff00, 0xff00000101ff0100, |     0xffff0000ffff0000, 0xffff0000ffff0001, 0xffff0000ff000000, 0xffff0000ff0001ff, | ||||||
|     0xff0000010100ff00, 0xff0001ff00ff00ff, 0xff0001ff00000101, 0xff0001ff000100ff, |     0xffff0000ff000101, 0xffff0000ff010100, 0xffff000000ffffff, 0xffff000000ff0000, | ||||||
|     0xff0001ff01000000, 0xff000100ff0001ff, 0xff0001000000ff01, 0xff00010000000000, |     0xffff000000ff0101, 0xffff00000000ffff, 0xffff00000000ff00, 0xffff0000000000ff, | ||||||
|     0xff00010000010001, 0xff00010000010100, 0xff00010001ffff00, 0xff00010001ff0101, |     0xffff000000000000, 0xffff000000000001, 0xffff000000000100, 0xffff00000001ffff, | ||||||
|     0xff00010001010000, 0xff000101ffffffff, 0xff000101ff000101, 0xff00010101ff00ff, |     0xffff00000001ff01, 0xffff000000010000, 0xffff0000000101ff, 0xffff000000010101, | ||||||
|     0xff00010101000001, 0xff000101010100ff, 0xff01ffffff000101, 0xff01ffffff01ffff, |     0xffff000001ffff00, 0xffff00000100ff00, 0xffff000001000000, 0xffff0000010001ff, | ||||||
|     0xff01ffffff01ff01, 0xff01ffffff0101ff, 0xff01ffff00000000, 0xff01ffff01ff0001, |     0xffff000001000101, 0xffff00000101ff00, 0xffff0000010100ff, 0xffff000001010000, | ||||||
|     0xff01ffff0101ff01, 0xff01ff00ff000000, 0xff01ff0000ff0100, 0xff01ff000000ff01, |     0xffff000001010001, 0xffff000001010100, 0xffff0001ff0000ff, 0xffff0001ff000100, | ||||||
|     0xff01ff0000010000, 0xff01ff00010000ff, 0xff01ff01ff01ff00, 0xff01ff0100000101, |     0xffff000100ffff00, 0xffff000100ff00ff, 0xffff00010000ffff, 0xffff00010000ff01, | ||||||
|     0xff0100ffffff0000, 0xff0100ffff010000, 0xff0100ff01ff00ff, 0xff0100ff01000100, |     0xffff000100000000, 0xffff0001000001ff, 0xffff00010001ffff, 0xffff00010001ff00, | ||||||
|     0xff0100ff010100ff, 0xff010000ffffff01, 0xff01000000000000, 0xff0100000101ff00, |     0xffff000100010001, 0xffff000100010100, 0xffff000101ff0000, 0xffff00010100ff00, | ||||||
|     0xff010001ffff00ff, 0xff010001ff000100, 0xff01000100ffff00, 0xff01000100010001, |     0xffff0001010000ff, 0xffff000101000100, 0xffff01ffffffffff, 0xffff01ffffffff01, | ||||||
|     0xff01000101ff0001, 0xff010001010001ff, 0xff0101ffffffffff, 0xff0101ffff01ffff, |     0xffff01ffffff01ff, 0xffff01ffffff0101, 0xffff01ffff000000, 0xffff01ffff01ffff, | ||||||
|     0xff0101ffff010101, 0xff0101ff0000ff00, 0xff0101ff01010001, 0xff010100ff000000, |     0xffff01ffff01ff01, 0xffff01ffff0101ff, 0xffff01ffff010101, 0xffff01ff00ff0000, | ||||||
|     0xff010100ff01ff01, 0xff01010000ff0001, 0xff01010000000100, 0xff01010001000000, |     0xffff01ff0000ff00, 0xffff01ff00000001, 0xffff01ff00010000, 0xffff01ff01ffffff, | ||||||
|     0xff0101010100ffff, 0x00ffffff0000ff01, 0x00ffffff000000ff, 0x00ffffff00000100, |     0xffff01ff01ffff01, 0xffff01ff01ff01ff, 0xffff01ff01ff0101, 0xffff01ff01000000, | ||||||
|     0x00ffffff00010000, 0x00ffff00ffff0001, 0x00ffff00ff0000ff, 0x00ffff00ff000100, |     0xffff01ff0101ffff, 0xffff01ff0101ff01, 0xffff01ff010101ff, 0xffff01ff01010101, | ||||||
|     0x00ffff0000000000, 0x00ffff0001000100, 0x00ffff0001010001, 0x00ffff01ff00ff01, |     0xffff0100ffff0000, 0xffff0100ff00ff00, 0xffff0100ff0000ff, 0xffff0100ff000100, | ||||||
|     0x00ffff0100ff0100, 0x00ffff010000ff00, 0x00ffff01000100ff, 0x00ffff0101ff00ff, |     0xffff0100ff0100ff, 0xffff0100ff010000, 0xffff010000ffff00, 0xffff01000000ffff, | ||||||
|     0x00ffff010101ff00, 0x00ff00ffffffffff, 0x00ff00ffffff01ff, 0x00ff00ffff000101, |     0xffff01000000ff00, 0xffff010000000000, 0xffff01000001ff00, 0xffff0100000100ff, | ||||||
|     0x00ff00ff00000000, 0x00ff00ff000101ff, 0x00ff00ff01010101, 0x00ff0000ff000000, |     0xffff010000010100, 0xffff01000100ff00, 0xffff0100010000ff, 0xffff010001000001, | ||||||
|     0x00ff0000ff01ffff, 0x00ff000000ff0000, 0x00ff00000000ff00, 0x00ff0000000000ff, |     0xffff010001000100, 0xffff010001010000, 0xffff0101ffffffff, 0xffff0101ffffff01, | ||||||
|     0x00ff000000000000, 0x00ff000000000001, 0x00ff000000000100, 0x00ff000000010000, |     0xffff0101ffff01ff, 0xffff0101ffff0101, 0xffff0101ff000000, 0xffff0101ff01ffff, | ||||||
|     0x00ff000001ffff01, 0x00ff000001000000, 0x00ff0001ff000101, 0x00ff000100ffffff, |     0xffff0101ff01ff01, 0xffff0101ff0101ff, 0xffff0101ff010101, 0xffff010100ff0000, | ||||||
|     0x00ff000100000000, 0x00ff0001010001ff, 0x00ff01ffff000000, 0x00ff01ff0001ff00, |     0xffff01010000ff00, 0xffff010100000100, 0xffff01010001ff00, 0xffff010100010000, | ||||||
|     0x00ff01ff01ff0100, 0x00ff0100ff01ff01, 0x00ff010000ff00ff, 0x00ff010000ff0101, |     0xffff010101ffffff, 0xffff010101ffff01, 0xffff010101ff0000, 0xffff010101ff01ff, | ||||||
|     0x00ff010000000000, 0x00ff010000010101, 0x00ff01000100ff00, 0x00ff010001010000, |     0xffff010101ff0101, 0xffff010101000000, 0xffff01010101ffff, 0xffff01010101ff01, | ||||||
|     0x00ff0101ffffff00, 0x00ff01010000ff01, 0x00ff010100000100, 0x00ff010101ff0000, |     0xffff0101010101ff, 0xffff010101010101, 0xff00ffffff00ffff, 0xff00ffffff00ff00, | ||||||
|     0x0000ffffffff0100, 0x0000ffffff00ff00, 0x0000ffffff0000ff, 0x0000ffffff010000, |     0xff00ffffff0000ff, 0xff00ffffff000100, 0xff00ffffff0100ff, 0xff00ffffff010000, | ||||||
|     0x0000ffff00000000, 0x0000ffff00010101, 0x0000ffff01ffff01, 0x0000ffff01000100, |     0xff00ffff00ffff00, 0xff00ffff00ff00ff, 0xff00ffff0000ffff, 0xff00ffff00000000, | ||||||
|     0x0000ff00ff000000, 0x0000ff00ff01ff00, 0x0000ff00ff0101ff, 0x0000ff0000ff0000, |     0xff00ffff000001ff, 0xff00ffff0001ff00, 0xff00ffff000100ff, 0xff00ffff00010000, | ||||||
|     0x0000ff000000ff00, 0x0000ff00000000ff, 0x0000ff0000000000, 0x0000ff0000000001, |     0xff00ffff00010100, 0xff00ffff0100ff00, 0xff00ffff010000ff, 0xff00ffff01000001, | ||||||
|     0x0000ff0000000100, 0x0000ff0000010000, 0x0000ff0001ffffff, 0x0000ff0001ff01ff, |     0xff00ffff0101ff00, 0xff00ffff01010000, 0xff00ff00ffffff00, 0xff00ff00ffff00ff, | ||||||
|     0x0000ff0001000000, 0x0000ff000101ffff, 0x0000ff01ffff0101, 0x0000ff01ff010000, |     0xff00ff00ffff0001, 0xff00ff00ffff0100, 0xff00ff00ff00ffff, 0xff00ff00ff00ff01, | ||||||
|     0x0000ff0100000000, 0x0000ff0101000101, 0x000000ffffff0001, 0x000000ffff000000, |     0xff00ff00ff000000, 0xff00ff00ff0001ff, 0xff00ff00ff01ff00, 0xff00ff00ff0100ff, | ||||||
|     0x000000ff00ff0000, 0x000000ff0000ff00, 0x000000ff000000ff, 0x000000ff00000000, |     0xff00ff00ff010100, 0xff00ff0000ff0000, 0xff00ff0000ff0101, 0xff00ff000000ffff, | ||||||
|     0x000000ff00000001, 0x000000ff00000100, 0x000000ff00010000, 0x000000ff01000000, |     0xff00ff000000ff00, 0xff00ff000000ff01, 0xff00ff00000000ff, 0xff00ff0000000000, | ||||||
|     0x000000ff0101ff00, 0x00000000ffff0000, 0x00000000ff00ff00, 0x00000000ff0000ff, |     0xff00ff0000000001, 0xff00ff0000000100, 0xff00ff000001ffff, 0xff00ff0000010000, | ||||||
|     0x00000000ff000000, 0x00000000ff000001, 0x00000000ff000100, 0x00000000ff010000, |     0xff00ff0001ff00ff, 0xff00ff000100ff01, 0xff00ff0001000000, 0xff00ff000101ff00, | ||||||
|     0x0000000000ffff00, 0x0000000000ff00ff, 0x0000000000ff0000, 0x0000000000ff0001, |     0xff00ff00010100ff, 0xff00ff01ff00ff00, 0xff00ff01ff0000ff, 0xff00ff01ff000001, | ||||||
|  |     0xff00ff01ff010000, 0xff00ff0100ffffff, 0xff00ff0100ff0001, 0xff00ff0100ff0100, | ||||||
|  |     0xff00ff010000ff01, 0xff00ff0100000000, 0xff00ff01000001ff, 0xff00ff0100000101, | ||||||
|  |     0xff00ff01000100ff, 0xff00ff0100010001, 0xff00ff0101ff0000, 0xff00ff010100ff00, | ||||||
|  |     0xff00ff01010000ff, 0xff00ff0101000001, 0xff00ff0101010000, 0xff0000ffffffff00, | ||||||
|  |     0xff0000ffffff0001, 0xff0000ffffff0100, 0xff0000ffff0000ff, 0xff0000ffff000000, | ||||||
|  |     0xff0000ffff0001ff, 0xff0000ffff000100, 0xff0000ffff01ff00, 0xff0000ffff010001, | ||||||
|  |     0xff0000ff00ffff00, 0xff0000ff00ff0000, 0xff0000ff00ff0001, 0xff0000ff00ff01ff, | ||||||
|  |     0xff0000ff00ff0101, 0xff0000ff0000ff00, 0xff0000ff000000ff, 0xff0000ff00000000, | ||||||
|  |     0xff0000ff00000001, 0xff0000ff00000100, 0xff0000ff0001ff01, 0xff0000ff00010000, | ||||||
|  |     0xff0000ff000101ff, 0xff0000ff01ff00ff, 0xff0000ff01ff0100, 0xff0000ff0100ffff, | ||||||
|  |     0xff0000ff010000ff, 0xff0000ff01000000, 0xff0000ff010001ff, 0xff0000ff01000100, | ||||||
|  |     0xff0000ff01000101, 0xff0000ff0101ff00, 0xff0000ff010100ff, 0xff0000ff01010000, | ||||||
|  |     0xff0000ff01010100, 0xff000000ffffff01, 0xff000000ffff0000, 0xff000000ffff0101, | ||||||
|  |     0xff000000ff00ff00, 0xff000000ff0000ff, 0xff000000ff000000, 0xff000000ff000001, | ||||||
|  |     0xff000000ff000100, 0xff000000ff01ffff, 0xff000000ff01ff01, 0xff000000ff010000, | ||||||
|  |     0xff000000ff0101ff, 0xff000000ff010101, 0xff00000000ffff00, 0xff00000000ff00ff, | ||||||
|  |     0xff00000000ff0000, 0xff00000000ff0001, 0xff0000000000ff00, 0xff0000000000ff01, | ||||||
|  |     0xff000000000000ff, 0xff00000000000000, 0xff00000000000001, 0xff00000000000100, | ||||||
|  |     0xff00000000000101, 0xff0000000001ff00, 0xff000000000100ff, 0xff00000000010000, | ||||||
|  |     0xff00000000010001, 0xff00000000010100, 0xff00000001ffffff, 0xff00000001ffff01, | ||||||
|  |     0xff00000001ff00ff, 0xff00000001ff0000, 0xff00000001ff01ff, 0xff00000001ff0101, | ||||||
|  |     0xff0000000100ffff, 0xff0000000100ff00, 0xff000000010000ff, 0xff00000001000000, | ||||||
|  |     0xff00000001000001, 0xff00000001000100, 0xff00000001000101, 0xff0000000101ffff, | ||||||
|  |     0xff0000000101ff01, 0xff00000001010000, 0xff000001ffffff00, 0xff000001ffff00ff, | ||||||
|  |     0xff000001ffff0000, 0xff000001ffff0001, 0xff000001ff000000, 0xff000001ff000001, | ||||||
|  |     0xff000001ff0001ff, 0xff000001ff000101, 0xff000001ff01ff00, 0xff000001ff010001, | ||||||
|  |     0xff00000100ffffff, 0xff00000100ffff01, 0xff00000100ff00ff, 0xff00000100ff0000, | ||||||
|  |     0xff00000100ff01ff, 0xff00000100ff0101, 0xff0000010000ff00, 0xff00000100000000, | ||||||
|  |     0xff00000100000001, 0xff000001000001ff, 0xff00000100000100, 0xff0000010001ff00, | ||||||
|  |     0xff000001000100ff, 0xff00000100010000, 0xff000001000101ff, 0xff00000100010100, | ||||||
|  |     0xff00000100010101, 0xff00000101ff0001, 0xff00000101ff0101, 0xff0000010100ff01, | ||||||
|  |     0xff00000101000000, 0xff000001010100ff, 0xff00000101010100, 0xff0001ffff00ff00, | ||||||
|  |     0xff0001ffff000001, 0xff0001ffff010000, 0xff0001ff00ffff00, 0xff0001ff00ff00ff, | ||||||
|  |     0xff0001ff00ff0001, 0xff0001ff00ff0100, 0xff0001ff0000ffff, 0xff0001ff00000000, | ||||||
|  |     0xff0001ff000001ff, 0xff0001ff00000101, 0xff0001ff0001ffff, 0xff0001ff0001ff00, | ||||||
|  |     0xff0001ff000100ff, 0xff0001ff00010001, 0xff0001ff00010100, 0xff0001ff01ff0000, | ||||||
|  |     0xff0001ff0100ff00, 0xff0001ff010000ff, 0xff0001ff01010000, 0xff000100ff00ffff, | ||||||
|  |     0xff000100ff00ff01, 0xff000100ff000000, 0xff000100ff000101, 0xff000100ff01ff00, | ||||||
|  |     0xff000100ff010000, 0xff00010000ffff01, 0xff00010000ff00ff, 0xff00010000ff0000, | ||||||
|  |     0xff00010000ff01ff, 0xff0001000000ff00, 0xff000100000000ff, 0xff00010000000000, | ||||||
|  |     0xff00010000000001, 0xff00010000000100, 0xff00010000000101, 0xff0001000001ffff, | ||||||
|  |     0xff00010000010000, 0xff00010000010101, 0xff00010001ff0100, 0xff0001000100ff00, | ||||||
|  |     0xff0001000100ff01, 0xff00010001000000, 0xff000100010001ff, 0xff0001000101ff00, | ||||||
|  |     0xff00010001010001, 0xff00010001010100, 0xff000101ffff0100, 0xff000101ff000001, | ||||||
|  |     0xff000101ff0100ff, 0xff000101ff010001, 0xff00010100ff00ff, 0xff00010100ff0001, | ||||||
|  |     0xff00010100ff0100, 0xff0001010000ffff, 0xff0001010000ff01, 0xff00010100000000, | ||||||
|  |     0xff000101000001ff, 0xff0001010001ff00, 0xff00010100010001, 0xff00010100010100, | ||||||
|  |     0xff00010101ff0000, 0xff0001010100ff00, 0xff00010101000001, 0xff00010101000101, | ||||||
|  |     0xff01ffffffffffff, 0xff01ffffffffff01, 0xff01ffffffff01ff, 0xff01ffffffff0101, | ||||||
|  |     0xff01ffffff000000, 0xff01ffffff01ffff, 0xff01ffffff01ff01, 0xff01ffffff010000, | ||||||
|  |     0xff01ffffff0101ff, 0xff01ffffff010101, 0xff01ffff00ff0000, 0xff01ffff0000ff00, | ||||||
|  |     0xff01ffff00000100, 0xff01ffff0001ff00, 0xff01ffff00010000, 0xff01ffff01ffffff, | ||||||
|  |     0xff01ffff01ffff01, 0xff01ffff01ff01ff, 0xff01ffff01ff0101, 0xff01ffff01000000, | ||||||
|  |     0xff01ffff0101ffff, 0xff01ffff0101ff01, 0xff01ffff01010000, 0xff01ffff010101ff, | ||||||
|  |     0xff01ffff01010101, 0xff01ff00ffff0000, 0xff01ff00ff00ff00, 0xff01ff00ff0000ff, | ||||||
|  |     0xff01ff00ff000100, 0xff01ff00ff010000, 0xff01ff0000ffff01, 0xff01ff0000ff00ff, | ||||||
|  |     0xff01ff0000ff0100, 0xff01ff0000000000, 0xff01ff00000001ff, 0xff01ff0000000101, | ||||||
|  |     0xff01ff000001ff00, 0xff01ff00000100ff, 0xff01ff0000010000, 0xff01ff0000010001, | ||||||
|  |     0xff01ff0001ff0000, 0xff01ff000100ffff, 0xff01ff0001000001, 0xff01ff0001000100, | ||||||
|  |     0xff01ff0001010000, 0xff01ff01ffffff00, 0xff01ff01ffff01ff, 0xff01ff01ffff0101, | ||||||
|  |     0xff01ff01ff00ff00, 0xff01ff01ff000000, 0xff01ff01ff01ffff, 0xff01ff01ff01ff01, | ||||||
|  |     0xff01ff01ff0101ff, 0xff01ff01ff010101, 0xff01ff0100ff0000, 0xff01ff010000ff00, | ||||||
|  |     0xff01ff0100000001, 0xff01ff0100000100, 0xff01ff0100010000, 0xff01ff0101ffff00, | ||||||
|  |     0xff01ff0101ff01ff, 0xff01ff0101ff0101, 0xff01ff010100ff00, 0xff01ff0101000000, | ||||||
|  |     0xff01ff010101ffff, 0xff01ff010101ff01, 0xff01ff01010101ff, 0xff01ff0101010101, | ||||||
|  |     0xff0100ffffff0000, 0xff0100ffff0000ff, 0xff0100ffff000001, 0xff0100ffff000100, | ||||||
|  |     0xff0100ffff010000, 0xff0100ff00ff00ff, 0xff0100ff00ff0000, 0xff0100ff00ff0001, | ||||||
|  |     0xff0100ff00ff0100, 0xff0100ff0000ff01, 0xff0100ff00000000, 0xff0100ff000001ff, | ||||||
|  |     0xff0100ff00000101, 0xff0100ff00010001, 0xff0100ff01ff0000, 0xff0100ff0100ff00, | ||||||
|  |     0xff0100ff010000ff, 0xff0100ff01000100, 0xff0100ff0101ff00, 0xff0100ff01010000, | ||||||
|  |     0xff010000ffff0100, 0xff010000ff000000, 0xff010000ff01ff00, 0xff010000ff010100, | ||||||
|  |     0xff01000000ffffff, 0xff01000000ff0000, 0xff01000000ff01ff, 0xff0100000000ff00, | ||||||
|  |     0xff010000000000ff, 0xff01000000000000, 0xff01000000000100, 0xff0100000001ff01, | ||||||
|  |     0xff01000000010000, 0xff010000000101ff, 0xff01000001ff0100, 0xff0100000100ffff, | ||||||
|  |     0xff010000010000ff, 0xff01000001000000, 0xff010000010001ff, 0xff01000001000101, | ||||||
|  |     0xff0100000101ff00, 0xff010000010100ff, 0xff01000001010001, 0xff01000001010100, | ||||||
|  |     0xff010001ffff0000, 0xff010001ff00ffff, 0xff010001ff00ff01, 0xff010001ff000100, | ||||||
|  |     0xff010001ff010000, 0xff01000100ffff00, 0xff01000100ff0100, 0xff01000100000000, | ||||||
|  |     0xff0100010001ffff, 0xff0100010001ff00, 0xff01000100010100, 0xff01000101ff00ff, | ||||||
|  |     0xff01000101ff0001, 0xff0100010100ffff, 0xff01000101000101, 0xff0101ffffffffff, | ||||||
|  |     0xff0101ffffffff01, 0xff0101ffffff01ff, 0xff0101ffffff0101, 0xff0101ffff000000, | ||||||
|  |     0xff0101ffff01ffff, 0xff0101ffff01ff01, 0xff0101ffff0101ff, 0xff0101ffff010101, | ||||||
|  |     0xff0101ff00ff0000, 0xff0101ff0000ff00, 0xff0101ff000000ff, 0xff0101ff00010000, | ||||||
|  |     0xff0101ff01ffffff, 0xff0101ff01ffff01, 0xff0101ff01ff01ff, 0xff0101ff01ff0101, | ||||||
|  |     0xff0101ff0101ffff, 0xff0101ff0101ff01, 0xff0101ff010101ff, 0xff0101ff01010101, | ||||||
|  |     0xff010100ffff0100, 0xff010100ff00ff00, 0xff010100ff0000ff, 0xff010100ff000100, | ||||||
|  |     0xff010100ff010000, 0xff01010000ff0001, 0xff01010000ff0100, 0xff0101000000ff01, | ||||||
|  |     0xff01010000000000, 0xff0101000001ff00, 0xff010100000100ff, 0xff01010000010001, | ||||||
|  |     0xff01010000010100, 0xff01010001ff0000, 0xff0101000100ffff, 0xff01010001000001, | ||||||
|  |     0xff01010001000100, 0xff010100010100ff, 0xff01010001010000, 0xff010101ffffffff, | ||||||
|  |     0xff010101ffffff01, 0xff010101ffff01ff, 0xff010101ffff0101, 0xff010101ff01ffff, | ||||||
|  |     0xff010101ff01ff01, 0xff010101ff0101ff, 0xff010101ff010101, 0xff01010100ff0000, | ||||||
|  |     0xff0101010000ff00, 0xff01010100000001, 0xff01010100000100, 0xff01010100010000, | ||||||
|  |     0xff01010101ffffff, 0xff01010101ffff01, 0xff01010101ff01ff, 0xff01010101ff0101, | ||||||
|  |     0xff01010101000000, 0xff0101010101ffff, 0xff0101010101ff01, 0xff010101010101ff, | ||||||
|  |     0xff01010101010101, 0x00ffffffffff0000, 0x00ffffffff00ff00, 0x00ffffffff000001, | ||||||
|  |     0x00ffffffff010000, 0x00ffffff00ff0100, 0x00ffffff0000ff01, 0x00ffffff00000000, | ||||||
|  |     0x00ffffff000001ff, 0x00ffffff00000101, 0x00ffffff0001ff00, 0x00ffffff000100ff, | ||||||
|  |     0x00ffffff00010001, 0x00ffffff010000ff, 0x00ffffff01000100, 0x00ffffff0101ff00, | ||||||
|  |     0x00ffffff01010001, 0x00ffff00ffffffff, 0x00ffff00ffffff00, 0x00ffff00ffff00ff, | ||||||
|  |     0x00ffff00ffff0001, 0x00ffff00ffff0100, 0x00ffff00ff00ff01, 0x00ffff00ff000000, | ||||||
|  |     0x00ffff00ff000001, 0x00ffff00ff0001ff, 0x00ffff00ff000101, 0x00ffff00ff01ff00, | ||||||
|  |     0x00ffff00ff010001, 0x00ffff00ff010100, 0x00ffff0000ff0000, 0x00ffff0000ff01ff, | ||||||
|  |     0x00ffff0000ff0101, 0x00ffff000000ff00, 0x00ffff00000000ff, 0x00ffff0000000000, | ||||||
|  |     0x00ffff0000000001, 0x00ffff0000000100, 0x00ffff0000000101, 0x00ffff0000010000, | ||||||
|  |     0x00ffff00000101ff, 0x00ffff0000010101, 0x00ffff0001ffff00, 0x00ffff0001ff00ff, | ||||||
|  |     0x00ffff0001ff0001, 0x00ffff000100ffff, 0x00ffff000100ff01, 0x00ffff0001000000, | ||||||
|  |     0x00ffff000101ffff, 0x00ffff000101ff00, 0x00ffff000101ff01, 0x00ffff01ffff0000, | ||||||
|  |     0x00ffff01ff00ff00, 0x00ffff01ff0000ff, 0x00ffff01ff000001, 0x00ffff01ff010000, | ||||||
|  |     0x00ffff0100ffff00, 0x00ffff010000ff01, 0x00ffff0100000000, 0x00ffff0100000101, | ||||||
|  |     0x00ffff01000100ff, 0x00ffff0100010100, 0x00ffff0101ff0100, 0x00ffff01010000ff, | ||||||
|  |     0x00ffff0101010000, 0x00ff00ffffffff00, 0x00ff00ffff000000, 0x00ff00ffff000100, | ||||||
|  |     0x00ff00ffff010100, 0x00ff00ff00ff0000, 0x00ff00ff00ff01ff, 0x00ff00ff00ff0101, | ||||||
|  |     0x00ff00ff0000ff00, 0x00ff00ff000000ff, 0x00ff00ff00000000, 0x00ff00ff00000001, | ||||||
|  |     0x00ff00ff0001ff00, 0x00ff00ff0001ff01, 0x00ff00ff00010000, 0x00ff00ff000101ff, | ||||||
|  |     0x00ff00ff00010101, 0x00ff00ff01ffff00, 0x00ff00ff01ff0001, 0x00ff00ff01ff0100, | ||||||
|  |     0x00ff00ff0100ffff, 0x00ff00ff0100ff01, 0x00ff00ff01000000, 0x00ff00ff0101ffff, | ||||||
|  |     0x00ff00ff0101ff00, 0x00ff00ff01010100, 0x00ff0000ffffff00, 0x00ff0000ffffff01, | ||||||
|  |     0x00ff0000ffff0000, 0x00ff0000ffff0101, 0x00ff0000ff00ff00, 0x00ff0000ff0000ff, | ||||||
|  |     0x00ff0000ff000000, 0x00ff0000ff000001, 0x00ff0000ff000100, 0x00ff0000ff01ffff, | ||||||
|  |     0x00ff0000ff010000, 0x00ff0000ff010101, 0x00ff000000ffff00, 0x00ff000000ff00ff, | ||||||
|  |     0x00ff000000ff0000, 0x00ff000000ff0001, 0x00ff000000ff0100, 0x00ff00000000ffff, | ||||||
|  |     0x00ff00000000ff00, 0x00ff0000000000ff, 0x00ff000000000000, 0x00ff000000000001, | ||||||
|  |     0x00ff0000000001ff, 0x00ff000000000100, 0x00ff00000001ff00, 0x00ff0000000100ff, | ||||||
|  |     0x00ff000000010000, 0x00ff000000010001, 0x00ff000000010100, 0x00ff000001ffff01, | ||||||
|  |     0x00ff000001ff00ff, 0x00ff000001ff0000, 0x00ff000001ff01ff, 0x00ff00000100ff00, | ||||||
|  |     0x00ff0000010000ff, 0x00ff000001000000, 0x00ff000001000001, 0x00ff000001000100, | ||||||
|  |     0x00ff000001000101, 0x00ff000001010000, 0x00ff0000010101ff, 0x00ff000001010101, | ||||||
|  |     0x00ff0001ffffff00, 0x00ff0001ffff0000, 0x00ff0001ffff0100, 0x00ff0001ff0000ff, | ||||||
|  |     0x00ff0001ff000000, 0x00ff0001ff0001ff, 0x00ff0001ff000101, 0x00ff0001ff01ff00, | ||||||
|  |     0x00ff0001ff0100ff, 0x00ff0001ff010100, 0x00ff000100ffffff, 0x00ff000100ffff01, | ||||||
|  |     0x00ff000100ff0000, 0x00ff000100ff01ff, 0x00ff00010000ffff, 0x00ff00010000ff00, | ||||||
|  |     0x00ff00010000ff01, 0x00ff000100000000, 0x00ff000100000001, 0x00ff000100000100, | ||||||
|  |     0x00ff00010001ff01, 0x00ff000100010000, 0x00ff0001000101ff, 0x00ff000101ffff00, | ||||||
|  |     0x00ff000101ff0000, 0x00ff000101ff0101, 0x00ff0001010000ff, 0x00ff000101000000, | ||||||
|  |     0x00ff00010101ff00, 0x00ff0001010100ff, 0x00ff000101010001, 0x00ff01ffffff0000, | ||||||
|  |     0x00ff01ffff00ff00, 0x00ff01ffff000000, 0x00ff01ffff000101, 0x00ff01ffff010000, | ||||||
|  |     0x00ff01ff00ffff01, 0x00ff01ff00ff0100, 0x00ff01ff0000ffff, 0x00ff01ff00000000, | ||||||
|  |     0x00ff01ff000001ff, 0x00ff01ff0001ff00, 0x00ff01ff000100ff, 0x00ff01ff00010001, | ||||||
|  |     0x00ff01ff00010100, 0x00ff01ff01ff0000, 0x00ff01ff0100ff00, 0x00ff01ff010000ff, | ||||||
|  |     0x00ff01ff01000001, 0x00ff01ff01000100, 0x00ff01ff01010000, 0x00ff0100ffffff00, | ||||||
|  |     0x00ff0100ffff0000, 0x00ff0100ffff0001, 0x00ff0100ffff0101, 0x00ff0100ff00ffff, | ||||||
|  |     0x00ff0100ff0000ff, 0x00ff0100ff000000, 0x00ff0100ff0001ff, 0x00ff0100ff01ff00, | ||||||
|  |     0x00ff0100ff0100ff, 0x00ff0100ff010001, 0x00ff010000ffffff, 0x00ff010000ff0000, | ||||||
|  |     0x00ff010000ff0101, 0x00ff01000000ff00, 0x00ff01000000ff01, 0x00ff0100000000ff, | ||||||
|  |     0x00ff010000000000, 0x00ff010000000001, 0x00ff010000000100, 0x00ff01000001ffff, | ||||||
|  |     0x00ff01000001ff01, 0x00ff010000010000, 0x00ff010000010001, 0x00ff010000010101, | ||||||
|  |     0x00ff010001ff0001, 0x00ff010001ff0100, 0x00ff01000100ff01, 0x00ff010001000000, | ||||||
|  |     0x00ff010001000001, 0x00ff0100010001ff, 0x00ff01000101ff00, 0x00ff0100010100ff, | ||||||
|  |     0x00ff010001010001, 0x00ff010001010100, 0x00ff0101ff000001, 0x00ff010100ff00ff, | ||||||
|  |     0x00ff010100ff0001, 0x00ff010100ff0100, 0x00ff010100000000, 0x00ff0101000001ff, | ||||||
|  |     0x00ff010100000101, 0x00ff0101000100ff, 0x00ff010100010100, 0x00ff0101010000ff, | ||||||
|  |     0x00ff010101010000, 0x0000ffffffffff00, 0x0000ffffffff00ff, 0x0000ffffffff0000, | ||||||
|  |     0x0000ffffffff0001, 0x0000ffffffff0100, 0x0000ffffff00ff01, 0x0000ffffff000000, | ||||||
|  |     0x0000ffffff000101, 0x0000ffffff01ff00, 0x0000ffffff0100ff, 0x0000ffffff010100, | ||||||
|  |     0x0000ffff00ffffff, 0x0000ffff00ff0000, 0x0000ffff00ff01ff, 0x0000ffff0000ff00, | ||||||
|  |     0x0000ffff000000ff, 0x0000ffff00000000, 0x0000ffff00000001, 0x0000ffff00000100, | ||||||
|  |     0x0000ffff00010000, 0x0000ffff000101ff, 0x0000ffff01ff0001, 0x0000ffff01ff0100, | ||||||
|  |     0x0000ffff01000000, 0x0000ffff010001ff, 0x0000ffff0101ffff, 0x0000ffff0101ff00, | ||||||
|  |     0x0000ffff01010001, 0x0000ffff01010100, 0x0000ff00ffff0000, 0x0000ff00ffff01ff, | ||||||
|  |     0x0000ff00ffff0100, 0x0000ff00ffff0101, 0x0000ff00ff00ff00, 0x0000ff00ff0000ff, | ||||||
|  |     0x0000ff00ff000000, 0x0000ff00ff000001, 0x0000ff00ff0001ff, 0x0000ff00ff000100, | ||||||
|  |     0x0000ff00ff01ffff, 0x0000ff00ff010000, 0x0000ff00ff010001, 0x0000ff00ff0101ff, | ||||||
|  |     0x0000ff00ff010101, 0x0000ff0000ffff00, 0x0000ff0000ff00ff, 0x0000ff0000ff0000, | ||||||
|  |     0x0000ff0000ff0001, 0x0000ff0000ff0100, 0x0000ff000000ffff, 0x0000ff000000ff00, | ||||||
|  |     0x0000ff000000ff01, 0x0000ff00000000ff, 0x0000ff0000000000, 0x0000ff0000000001, | ||||||
|  |     0x0000ff00000001ff, 0x0000ff0000000100, 0x0000ff0000000101, 0x0000ff000001ff00, | ||||||
|  |     0x0000ff00000100ff, 0x0000ff0000010000, 0x0000ff0000010001, 0x0000ff0000010100, | ||||||
|  |     0x0000ff0001ffff01, 0x0000ff0001ff0000, 0x0000ff000100ff00, 0x0000ff00010000ff, | ||||||
|  |     0x0000ff0001000000, 0x0000ff0001000001, 0x0000ff0001000100, 0x0000ff000101ffff, | ||||||
|  |     0x0000ff0001010000, 0x0000ff0001010101, 0x0000ff01ffffff00, 0x0000ff01ffff0001, | ||||||
|  |     0x0000ff01ff00ff01, 0x0000ff01ff000000, 0x0000ff01ff000101, 0x0000ff01ff01ff00, | ||||||
|  |     0x0000ff01ff0100ff, 0x0000ff0100ffff01, 0x0000ff0100ff0000, 0x0000ff0100ff0101, | ||||||
|  |     0x0000ff010000ff00, 0x0000ff01000000ff, 0x0000ff0100000000, 0x0000ff0100000001, | ||||||
|  |     0x0000ff0100000100, 0x0000ff010001ff01, 0x0000ff0100010000, 0x0000ff0101ff0000, | ||||||
|  |     0x0000ff010100ffff, 0x0000ff010100ff01, 0x0000ff0101000000, 0x0000ff0101000100, | ||||||
|  |     0x0000ff0101000101, 0x0000ff01010100ff, 0x000000ffffff00ff, 0x000000ffffff0000, | ||||||
|  |     0x000000ffff00ff00, 0x000000ffff0000ff, 0x000000ffff000000, 0x000000ffff000001, | ||||||
|  |     0x000000ffff0001ff, 0x000000ffff000100, 0x000000ffff01ff00, 0x000000ffff010000, | ||||||
|  |     0x000000ffff0101ff, 0x000000ffff010101, 0x000000ff00ffff00, 0x000000ff00ff00ff, | ||||||
|  |     0x000000ff00ff0000, 0x000000ff00ff0001, 0x000000ff00ff0100, 0x000000ff00ff0101, | ||||||
|  |     0x000000ff0000ffff, 0x000000ff0000ff00, 0x000000ff000000ff, 0x000000ff00000000, | ||||||
|  |     0x000000ff00000001, 0x000000ff000001ff, 0x000000ff00000100, 0x000000ff00000101, | ||||||
|  |     0x000000ff0001ff00, 0x000000ff0001ff01, 0x000000ff000100ff, 0x000000ff00010000, | ||||||
|  |     0x000000ff00010001, 0x000000ff00010100, 0x000000ff01ffffff, 0x000000ff01ff01ff, | ||||||
|  |     0x000000ff01ff0101, 0x000000ff0100ff00, 0x000000ff010000ff, 0x000000ff01000000, | ||||||
|  |     0x000000ff01000001, 0x000000ff01000100, 0x000000ff0101ff00, 0x000000ff010100ff, | ||||||
|  |     0x000000ff01010000, 0x000000ff01010101, 0x00000000ffffff00, 0x00000000ffffff01, | ||||||
|  |     0x00000000ffff00ff, 0x00000000ffff0000, 0x00000000ffff0001, 0x00000000ffff0100, | ||||||
|  |     0x00000000ff00ffff, 0x00000000ff00ff00, 0x00000000ff00ff01, 0x00000000ff0000ff, | ||||||
|  |     0x00000000ff000000, 0x00000000ff000001, 0x00000000ff000100, 0x00000000ff000101, | ||||||
|  |     0x00000000ff01ff00, 0x00000000ff0100ff, 0x00000000ff010000, 0x00000000ff010001, | ||||||
|  |     0x00000000ff010100, 0x0000000000ffffff, 0x0000000000ffff00, 0x0000000000ffff01, | ||||||
|  |     0x0000000000ff00ff, 0x0000000000ff0000, 0x0000000000ff0001, 0x0000000000ff01ff, | ||||||
|     0x0000000000ff0100, 0x000000000000ffff, 0x000000000000ff00, 0x000000000000ff01, |     0x0000000000ff0100, 0x000000000000ffff, 0x000000000000ff00, 0x000000000000ff01, | ||||||
|     0x00000000000000ff, 0x0000000000000001, 0x00000000000001ff, 0x0000000000000100, |     0x00000000000000ff, 0x0000000000000000, 0x0000000000000001, 0x00000000000001ff, | ||||||
|     0x0000000000000101, 0x000000000001ff00, 0x00000000000100ff, 0x0000000000010000, |     0x0000000000000100, 0x0000000000000101, 0x000000000001ffff, 0x000000000001ff00, | ||||||
|     0x0000000000010001, 0x0000000000010100, 0x0000000001ff0000, 0x000000000100ff00, |     0x00000000000100ff, 0x0000000000010000, 0x0000000000010001, 0x00000000000101ff, | ||||||
|     0x00000000010000ff, 0x0000000001000000, 0x0000000001000001, 0x0000000001000100, |     0x0000000000010100, 0x0000000000010101, 0x0000000001ffff00, 0x0000000001ff00ff, | ||||||
|     0x0000000001010000, 0x00000001ffff01ff, 0x00000001ff000000, 0x0000000100ff0000, |     0x0000000001ff0000, 0x0000000001ff0100, 0x0000000001ff0101, 0x000000000100ffff, | ||||||
|     0x000000010000ff00, 0x00000001000000ff, 0x0000000100000000, 0x0000000100000001, |     0x000000000100ff00, 0x00000000010000ff, 0x0000000001000000, 0x0000000001000001, | ||||||
|     0x0000000100000100, 0x0000000100010000, 0x0000000101000000, 0x000001ffff00ff00, |     0x00000000010001ff, 0x0000000001000100, 0x000000000101ff00, 0x00000000010100ff, | ||||||
|     0x000001ffff010001, 0x000001ffff0101ff, 0x000001ff00ffff01, 0x000001ff0000ffff, |     0x0000000001010000, 0x0000000001010001, 0x0000000001010100, 0x00000001ffffffff, | ||||||
|     0x000001ff00000000, 0x000001ff010000ff, 0x000001ff01010100, 0x00000100ffff0100, |     0x00000001ffffff00, 0x00000001ffffff01, 0x00000001ffff00ff, 0x00000001ffff0001, | ||||||
|     0x00000100ff000000, 0x0000010000ff0000, 0x000001000000ff00, 0x00000100000000ff, |     0x00000001ffff01ff, 0x00000001ffff0100, 0x00000001ff00ff00, 0x00000001ff0000ff, | ||||||
|     0x0000010000000000, 0x0000010000000001, 0x0000010000000100, 0x0000010000010000, |     0x00000001ff000000, 0x00000001ff0001ff, 0x00000001ff000100, 0x00000001ff01ffff, | ||||||
|     0x0000010001000000, 0x000001000101ff01, 0x00000101ffff0001, 0x00000101ff01ffff, |     0x00000001ff01ff00, 0x00000001ff01ff01, 0x00000001ff0100ff, 0x00000001ff010000, | ||||||
|     0x0000010100000000, 0x0000010101010100, 0x0001ffffff000000, 0x0001ffff00ffffff, |     0x00000001ff010001, 0x00000001ff0101ff, 0x00000001ff010100, 0x0000000100ffff00, | ||||||
|     0x0001ffff00000100, 0x0001ffff0001ff00, 0x0001ffff01000000, 0x0001ff00ffffff00, |     0x0000000100ff0000, 0x0000000100ff0001, 0x0000000100ff01ff, 0x0000000100ff0100, | ||||||
|     0x0001ff00ffff01ff, 0x0001ff00ff010000, 0x0001ff0000000000, 0x0001ff0000010001, |     0x0000000100ff0101, 0x000000010000ffff, 0x000000010000ff00, 0x000000010000ff01, | ||||||
|     0x0001ff0001ff0000, 0x0001ff0001010100, 0x0001ff01ff0000ff, 0x0001ff01ff000001, |     0x00000001000000ff, 0x0000000100000000, 0x0000000100000001, 0x00000001000001ff, | ||||||
|     0x0001ff0100ffffff, 0x0001ff010001ffff, 0x0001ff01000101ff, 0x0001ff010100ff01, |     0x0000000100000100, 0x0000000100000101, 0x000000010001ff00, 0x00000001000100ff, | ||||||
|     0x000100ffff00ffff, 0x000100ffff00ff01, 0x000100ffff000100, 0x000100ff00000000, |     0x0000000100010000, 0x0000000100010100, 0x0000000101ffff01, 0x0000000101ff0000, | ||||||
|     0x000100ff000101ff, 0x000100ff01ff0101, 0x000100ff0100ffff, 0x000100ff01010101, |     0x0000000101ff0001, 0x0000000101ff01ff, 0x0000000101ff0100, 0x0000000101ff0101, | ||||||
|     0x00010000ff000000, 0x00010000ff010100, 0x0001000000ff0000, 0x000100000000ff00, |     0x000000010100ff00, 0x0000000101000000, 0x0000000101000101, 0x000000010101ff01, | ||||||
|     0x00010000000000ff, 0x0001000000000000, 0x0001000000000001, 0x0001000000000100, |     0x0000000101010000, 0x0000000101010001, 0x00000001010101ff, 0x0000000101010100, | ||||||
|     0x0001000000010000, 0x0001000001ffff01, 0x0001000001000000, 0x0001000100ff0101, |     0x000001ffffff00ff, 0x000001ffffff0000, 0x000001ffffff0001, 0x000001ffffff0100, | ||||||
|     0x0001000100000000, 0x00010001010100ff, 0x000101ffffff01ff, 0x000101ffffff0101, |     0x000001ffff00ffff, 0x000001ffff000000, 0x000001ffff0001ff, 0x000001ffff01ff00, | ||||||
|     0x000101ff00010000, 0x000101ff01ff0000, 0x000101ff0100ff01, 0x00010100ffff0000, |     0x000001ffff010101, 0x000001ff00ff0000, 0x000001ff00ff01ff, 0x000001ff00ff0101, | ||||||
|     0x0001010000000000, 0x000101000001ffff, 0x0001010000010101, 0x00010100010001ff, |     0x000001ff0000ff00, 0x000001ff000000ff, 0x000001ff00000000, 0x000001ff00000001, | ||||||
|     0x00010101ff00ff00, 0x00010101ff010001, 0x0001010100ffffff, 0x0001010100ff01ff, |     0x000001ff000001ff, 0x000001ff00000100, 0x000001ff0001ffff, 0x000001ff0001ff01, | ||||||
|     0x00010101000101ff, 0x0001010101ff0000, 0x000101010100ff01, 0x0001010101000101, |     0x000001ff000100ff, 0x000001ff00010000, 0x000001ff01ffff01, 0x000001ff01ff0100, | ||||||
|     0x01ffffffffff0101, 0x01ffffffff01ffff, 0x01ffffffff01ff01, 0x01ffffffff0101ff, |     0x000001ff0100ffff, 0x000001ff0100ff01, 0x000001ff01000000, 0x000001ff010001ff, | ||||||
|     0x01ffffffff010101, 0x01ffffff00000000, 0x01ffffff01ff01ff, 0x01ffffff01000101, |     0x000001ff0101ff00, 0x000001ff01010100, 0x00000100ffffff00, 0x00000100ffffff01, | ||||||
|     0x01ffffff0101ff01, 0x01ffffff010100ff, 0x01ffff000000ff00, 0x01ffff0000000001, |     0x00000100ffff0000, 0x00000100ffff0101, 0x00000100ff00ff00, 0x00000100ff0000ff, | ||||||
|     0x01ffff00000001ff, 0x01ffff0000010000, 0x01ffff0001ff0000, 0x01ffff01ffffffff, |     0x00000100ff000000, 0x00000100ff000001, 0x00000100ff000100, 0x00000100ff010000, | ||||||
|     0x01ffff01ffff01ff, 0x01ffff01ff000000, 0x01ffff01ff01ffff, 0x01ffff01ff0101ff, |     0x0000010000ffff00, 0x0000010000ff00ff, 0x0000010000ff0000, 0x0000010000ff0001, | ||||||
|     0x01ffff010100ffff, 0x01ff00ffffff0000, 0x01ff00ffff010000, 0x01ff00ff00ffff01, |     0x0000010000ff0100, 0x000001000000ffff, 0x000001000000ff00, 0x000001000000ff01, | ||||||
|     0x01ff0000ff0000ff, 0x01ff000000000000, 0x01ff00000001ff01, 0x01ff000001ffffff, |     0x00000100000000ff, 0x0000010000000000, 0x0000010000000001, 0x00000100000001ff, | ||||||
|     0x01ff000001010100, 0x01ff0001ffffff01, 0x01ff0001ff010001, 0x01ff000101ff0100, |     0x0000010000000100, 0x0000010000000101, 0x000001000001ff00, 0x00000100000100ff, | ||||||
|     0x01ff000101000001, 0x01ff0001010100ff, 0x01ff01ffff00ffff, 0x01ff01ff00010001, |     0x0000010000010000, 0x0000010000010001, 0x0000010000010100, 0x0000010001ffff00, | ||||||
|     0x01ff01ff01000000, 0x01ff01ff010101ff, 0x01ff0100ff000001, 0x01ff010000ffff00, |     0x0000010001ff0000, 0x0000010001ff0100, 0x000001000100ff00, 0x00000100010000ff, | ||||||
|     0x01ff010000000100, 0x01ff010001ff01ff, 0x01ff01000101ffff, 0x01ff0101ffff00ff, |     0x0000010001000000, 0x0000010001000001, 0x00000100010001ff, 0x0000010001000100, | ||||||
|     0x01ff0101ffff0101, 0x01ff0101ff0101ff, 0x01ff010100010000, 0x0100ffff00ff00ff, |     0x0000010001010000, 0x00000101ffff00ff, 0x00000101ffff01ff, 0x00000101ff000000, | ||||||
|     0x0100ffff00ff0001, 0x0100ffff00000100, 0x0100ffff0100ff00, 0x0100ff00ffff0000, |     0x00000101ff000101, 0x00000101ff01ffff, 0x00000101ff010000, 0x00000101ff010001, | ||||||
|     0x0100ff00ff00ffff, 0x0100ff00ff00ff01, 0x0100ff00ff000100, 0x0100ff00ff010000, |     0x00000101ff010100, 0x0000010100ff0000, 0x0000010100ff01ff, 0x0000010100ff0100, | ||||||
|     0x0100ff0000000000, 0x0100ff00000100ff, 0x0100ff0001ff0101, 0x0100ff0001010101, |     0x000001010000ff00, 0x0000010100000000, 0x0000010100000001, 0x00000101000001ff, | ||||||
|     0x0100ff0100ff00ff, 0x0100ff0100ff0001, 0x0100ff0100000100, 0x0100ff0100010001, |     0x0000010100000100, 0x000001010001ff01, 0x0000010100010000, 0x00000101000101ff, | ||||||
|     0x0100ff0101000000, 0x010000ffff00ff00, 0x010000ff0000ffff, 0x010000ff00000000, |     0x0000010100010101, 0x0000010101ffff00, 0x0000010101ff0101, 0x000001010100ff01, | ||||||
|     0x010000ff010001ff, 0x010000ff01010001, 0x01000000ffffff00, 0x01000000ffff0101, |     0x0000010101000000, 0x0000010101000001, 0x00000101010001ff, 0x0000010101000101, | ||||||
|     0x01000000ff000000, 0x01000000ff0100ff, 0x01000000ff010101, 0x0100000000ff0000, |     0x000001010101ff00, 0x0001ffffffff0000, 0x0001ffffff0000ff, 0x0001ffffff000001, | ||||||
|     0x010000000000ff00, 0x01000000000000ff, 0x0100000000000000, 0x0100000000000001, |     0x0001ffffff000100, 0x0001ffffff010000, 0x0001ffff00ff00ff, 0x0001ffff0000ffff, | ||||||
|     0x0100000000000100, 0x0100000000010000, 0x0100000001000000, 0x0100000100000000, |     0x0001ffff00000000, 0x0001ffff00000001, 0x0001ffff000001ff, 0x0001ffff00000101, | ||||||
|     0x01000001000101ff, 0x0100000101ffff01, 0x010001ffff000101, 0x010001ff00ff0100, |     0x0001ffff0001ff00, 0x0001ffff000100ff, 0x0001ffff00010001, 0x0001ffff00010100, | ||||||
|     0x010001ff0000ff00, 0x010001ff000100ff, 0x010001ff01ffffff, 0x01000100ffff0000, |     0x0001ffff01ffff00, 0x0001ffff01000001, 0x0001ffff01010000, 0x0001ff00ffffff00, | ||||||
|     0x01000100ff0001ff, 0x0100010000000000, 0x010001000001ff00, 0x0100010001ff0000, |     0x0001ff00ffff00ff, 0x0001ff00ffff0001, 0x0001ff00ffff0100, 0x0001ff00ff00ff01, | ||||||
|     0x01000100010000ff, 0x0100010001000101, 0x01000101ff00ff01, 0x0100010100ff0100, |     0x0001ff00ff000000, 0x0001ff00ff01ff00, 0x0001ff00ff01ff01, 0x0001ff00ff010001, | ||||||
|     0x010001010000ffff, 0x0100010101010001, 0x0101ffffffff0101, 0x0101ffffff0001ff, |     0x0001ff00ff010100, 0x0001ff0000ff0000, 0x0001ff0000ff0100, 0x0001ff000000ff00, | ||||||
|     0x0101ffffff01ffff, 0x0101ffffff010101, 0x0101ffff00000000, 0x0101ffff0101ffff, |     0x0001ff0000000000, 0x0001ff0000000001, 0x0001ff0000000100, 0x0001ff0000010000, | ||||||
|     0x0101ffff010101ff, 0x0101ff00ff000000, 0x0101ff0000ff0100, 0x0101ff000000ff00, |     0x0001ff0000010001, 0x0001ff0000010101, 0x0001ff0001ff00ff, 0x0001ff0001ff0101, | ||||||
|     0x0101ff0000010000, 0x0101ff00010000ff, 0x0101ff0001000001, 0x0101ff01ff010101, |     0x0001ff000100ff01, 0x0001ff0001000000, 0x0001ff000101ff00, 0x0001ff0001010001, | ||||||
|     0x0101ff0100000000, 0x0101ff010101ff00, 0x010100ffffff0000, 0x010100ffff010000, |     0x0001ff0001010100, 0x0001ff01ff00ff00, 0x0001ff01ff000001, 0x0001ff01ff000100, | ||||||
|     0x010100ff00ff01ff, 0x010100ff000000ff, 0x010100ff00000101, 0x010100ff01ffff00, |     0x0001ff0100ffffff, 0x0001ff0100ffff00, 0x0001ff0100ff0001, 0x0001ff0100000000, | ||||||
|     0x01010000ffffff01, 0x01010000ff000100, 0x01010000ff01ff01, 0x0101000000000000, |     0x0001ff0100000001, 0x0001ff01000001ff, 0x0001ff010001ffff, 0x0001ff0101ff0000, | ||||||
|     0x01010000000100ff, 0x010100000101ff01, 0x01010001ffff0000, 0x01010001ff00ffff, |     0x0001ff010100ff00, 0x0001ff0101000001, 0x0001ff0101010000, 0x000100ffff00ff00, | ||||||
|     0x01010001ff010000, 0x0101000101ffffff, 0x0101000101ff01ff, 0x0101000101010101, |     0x000100ffff00ff01, 0x000100ffff000000, 0x000100ffff000001, 0x000100ffff000101, | ||||||
|     0x010101ffff01ffff, 0x010101ff00000000, 0x010101ff0001ff01, 0x010101ff0101ffff, |     0x000100ffff01ff00, 0x000100ffff010001, 0x000100ffff010100, 0x000100ff00ffffff, | ||||||
|     0x010101ff010101ff, 0x01010100ffffffff, 0x01010100ff000001, 0x010101000000ff00, |     0x000100ff00ffff01, 0x000100ff00ff0000, 0x000100ff00ff01ff, 0x000100ff00ff0101, | ||||||
|     0x0101010001010000, 0x0101010100ff0001, 0x010101010001ff01, 0x010101010101ffff, |     0x000100ff0000ff00, 0x000100ff000000ff, 0x000100ff00000000, 0x000100ff00000001, | ||||||
|  |     0x000100ff00000100, 0x000100ff00000101, 0x000100ff0001ffff, 0x000100ff0001ff01, | ||||||
|  |     0x000100ff00010000, 0x000100ff01ff00ff, 0x000100ff01ff0000, 0x000100ff01ff0100, | ||||||
|  |     0x000100ff0100ffff, 0x000100ff0100ff01, 0x000100ff010000ff, 0x000100ff01000000, | ||||||
|  |     0x000100ff01000001, 0x000100ff010001ff, 0x000100ff01000101, 0x000100ff0101ff00, | ||||||
|  |     0x000100ff010100ff, 0x000100ff01010100, 0x00010000ffff0000, 0x00010000ffff01ff, | ||||||
|  |     0x00010000ffff0101, 0x00010000ff00ff00, 0x00010000ff000000, 0x00010000ff000001, | ||||||
|  |     0x00010000ff000100, 0x0001000000ff00ff, 0x0001000000ff0000, 0x0001000000ff0001, | ||||||
|  |     0x0001000000ff0100, 0x000100000000ffff, 0x000100000000ff00, 0x00010000000000ff, | ||||||
|  |     0x0001000000000000, 0x0001000000000001, 0x0001000000000100, 0x000100000001ff00, | ||||||
|  |     0x00010000000100ff, 0x0001000000010000, 0x0001000000010001, 0x0001000000010100, | ||||||
|  |     0x0001000001ff0001, 0x0001000001ff0100, 0x0001000001ff0101, 0x000100000100ff00, | ||||||
|  |     0x0001000001000000, 0x0001000001000001, 0x0001000001000100, 0x0001000001000101, | ||||||
|  |     0x000100000101ff01, 0x0001000001010000, 0x0001000001010001, 0x00010000010101ff, | ||||||
|  |     0x00010001ffffff01, 0x00010001ffff0100, 0x00010001ff000000, 0x00010001ff01ffff, | ||||||
|  |     0x00010001ff010001, 0x00010001ff0101ff, 0x00010001ff010100, 0x0001000100ffffff, | ||||||
|  |     0x0001000100ff0000, 0x0001000100ff01ff, 0x0001000100ff0101, 0x000100010000ff00, | ||||||
|  |     0x00010001000000ff, 0x0001000100000000, 0x0001000100000001, 0x00010001000001ff, | ||||||
|  |     0x0001000100000101, 0x000100010001ffff, 0x0001000100010000, 0x00010001000101ff, | ||||||
|  |     0x0001000101ffffff, 0x0001000101ffff01, 0x0001000101ff0000, 0x0001000101ff0101, | ||||||
|  |     0x00010001010000ff, 0x0001000101000001, 0x00010001010001ff, 0x0001000101000100, | ||||||
|  |     0x000100010101ffff, 0x00010001010100ff, 0x0001000101010001, 0x0001000101010101, | ||||||
|  |     0x000101ffff000001, 0x000101ffff000100, 0x000101ffff010000, 0x000101ff00ffff00, | ||||||
|  |     0x000101ff0000ff01, 0x000101ff00000000, 0x000101ff00000101, 0x000101ff0001ff00, | ||||||
|  |     0x000101ff00010100, 0x000101ff01ff0000, 0x000101ff0100ff00, 0x000101ff010001ff, | ||||||
|  |     0x000101ff01010001, 0x00010100ffffff00, 0x00010100ffff00ff, 0x00010100ff00ffff, | ||||||
|  |     0x00010100ff000000, 0x00010100ff01ff00, 0x00010100ff0100ff, 0x00010100ff010001, | ||||||
|  |     0x00010100ff010100, 0x0001010000ffffff, 0x0001010000ffff00, 0x0001010000ff0000, | ||||||
|  |     0x0001010000ff0001, 0x0001010000ff01ff, 0x000101000000ff00, 0x00010100000000ff, | ||||||
|  |     0x0001010000000000, 0x0001010000000001, 0x0001010000000100, 0x000101000001ffff, | ||||||
|  |     0x0001010000010000, 0x0001010000010101, 0x0001010001ffff01, 0x0001010001ff00ff, | ||||||
|  |     0x0001010001ff0101, 0x0001010001000000, 0x000101000101ff00, 0x00010100010100ff, | ||||||
|  |     0x0001010001010000, 0x0001010001010100, 0x00010101ff00ff00, 0x00010101ff000001, | ||||||
|  |     0x00010101ff0001ff, 0x0001010100ffff00, 0x0001010100ff00ff, 0x0001010100ff0100, | ||||||
|  |     0x000101010000ffff, 0x0001010100000000, 0x00010101000001ff, 0x0001010100000101, | ||||||
|  |     0x00010101000100ff, 0x0001010100010000, 0x0001010100010100, 0x0001010101ff0001, | ||||||
|  |     0x00010101010000ff, 0x00010101010001ff, 0x0001010101000101, 0x0001010101010001, | ||||||
|  |     0x01ffffffffffffff, 0x01ffffffffffff01, 0x01ffffffffff01ff, 0x01ffffffffff0101, | ||||||
|  |     0x01ffffffff01ffff, 0x01ffffffff01ff01, 0x01ffffffff0101ff, 0x01ffffffff010101, | ||||||
|  |     0x01ffffff00ff0000, 0x01ffffff0000ffff, 0x01ffffff0000ff00, 0x01ffffff000000ff, | ||||||
|  |     0x01ffffff00000001, 0x01ffffff00000100, 0x01ffffff00010000, 0x01ffffff01ffffff, | ||||||
|  |     0x01ffffff01ffff01, 0x01ffffff01ff01ff, 0x01ffffff01ff0101, 0x01ffffff01000000, | ||||||
|  |     0x01ffffff0101ffff, 0x01ffffff0101ff01, 0x01ffffff010101ff, 0x01ffffff01010101, | ||||||
|  |     0x01ffff00ffff0000, 0x01ffff00ff00ff00, 0x01ffff00ff0000ff, 0x01ffff00ff000001, | ||||||
|  |     0x01ffff00ff000100, 0x01ffff00ff010000, 0x01ffff0000ffff00, 0x01ffff0000ff00ff, | ||||||
|  |     0x01ffff0000ff0100, 0x01ffff000000ffff, 0x01ffff000000ff01, 0x01ffff0000000000, | ||||||
|  |     0x01ffff0000000001, 0x01ffff00000001ff, 0x01ffff0000000100, 0x01ffff00000100ff, | ||||||
|  |     0x01ffff0000010001, 0x01ffff0000010100, 0x01ffff0001ff0000, 0x01ffff0001ff0100, | ||||||
|  |     0x01ffff00010000ff, 0x01ffff0001000001, 0x01ffff0001000100, 0x01ffff0001010000, | ||||||
|  |     0x01ffff01ffffffff, 0x01ffff01ffffff01, 0x01ffff01ffff01ff, 0x01ffff01ffff0101, | ||||||
|  |     0x01ffff01ff000000, 0x01ffff01ff01ffff, 0x01ffff01ff01ff01, 0x01ffff01ff0101ff, | ||||||
|  |     0x01ffff01ff010101, 0x01ffff010000ff00, 0x01ffff01000000ff, 0x01ffff0100000100, | ||||||
|  |     0x01ffff0100010000, 0x01ffff0101ffffff, 0x01ffff0101ffff01, 0x01ffff0101ff01ff, | ||||||
|  |     0x01ffff0101ff0101, 0x01ffff0101000000, 0x01ffff010101ffff, 0x01ffff010101ff01, | ||||||
|  |     0x01ffff01010101ff, 0x01ffff0101010101, 0x01ff00ffff0000ff, 0x01ff00ffff000100, | ||||||
|  |     0x01ff00ff00ffff00, 0x01ff00ff00ff00ff, 0x01ff00ff0000ff00, 0x01ff00ff00000000, | ||||||
|  |     0x01ff00ff00000101, 0x01ff00ff0001ff00, 0x01ff00ff000100ff, 0x01ff00ff00010100, | ||||||
|  |     0x01ff00ff010000ff, 0x01ff00ff01000100, 0x01ff0000ffffff00, 0x01ff0000ffff0100, | ||||||
|  |     0x01ff0000ff00ff01, 0x01ff0000ff000000, 0x01ff0000ff000101, 0x01ff0000ff010001, | ||||||
|  |     0x01ff0000ff010100, 0x01ff000000ffffff, 0x01ff000000ffff00, 0x01ff000000ff0000, | ||||||
|  |     0x01ff000000ff01ff, 0x01ff00000000ff00, 0x01ff0000000000ff, 0x01ff000000000000, | ||||||
|  |     0x01ff000000000001, 0x01ff000000000100, 0x01ff000000000101, 0x01ff000000010000, | ||||||
|  |     0x01ff000000010001, 0x01ff0000000101ff, 0x01ff000000010101, 0x01ff000001ffff00, | ||||||
|  |     0x01ff000001ff00ff, 0x01ff000001ff0001, 0x01ff000001ff0100, 0x01ff00000100ffff, | ||||||
|  |     0x01ff00000100ff01, 0x01ff000001000000, 0x01ff0000010001ff, 0x01ff000001010001, | ||||||
|  |     0x01ff0001ff00ff00, 0x01ff0001ff000001, 0x01ff0001ff000100, 0x01ff0001ff010000, | ||||||
|  |     0x01ff000100ffff00, 0x01ff000100ff00ff, 0x01ff000100ff0100, 0x01ff000100ff0101, | ||||||
|  |     0x01ff00010000ffff, 0x01ff000100000000, 0x01ff000100000100, 0x01ff000100000101, | ||||||
|  |     0x01ff00010001ff00, 0x01ff000100010001, 0x01ff000100010101, 0x01ff000101ff0000, | ||||||
|  |     0x01ff00010100ff00, 0x01ff000101000101, 0x01ff0001010100ff, 0x01ff01ffffffffff, | ||||||
|  |     0x01ff01ffffffff01, 0x01ff01ffffff01ff, 0x01ff01ffffff0101, 0x01ff01ffff000000, | ||||||
|  |     0x01ff01ffff01ffff, 0x01ff01ffff01ff01, 0x01ff01ffff0101ff, 0x01ff01ffff010101, | ||||||
|  |     0x01ff01ff00ffff00, 0x01ff01ff00ff0000, 0x01ff01ff0000ff00, 0x01ff01ff000000ff, | ||||||
|  |     0x01ff01ff00000100, 0x01ff01ff00010000, 0x01ff01ff00010100, 0x01ff01ff01ffffff, | ||||||
|  |     0x01ff01ff01ffff01, 0x01ff01ff01ff01ff, 0x01ff01ff01ff0101, 0x01ff01ff01000000, | ||||||
|  |     0x01ff01ff0101ffff, 0x01ff01ff0101ff01, 0x01ff01ff010101ff, 0x01ff01ff01010101, | ||||||
|  |     0x01ff0100ffff0000, 0x01ff0100ffff0001, 0x01ff0100ff00ff00, 0x01ff0100ff0000ff, | ||||||
|  |     0x01ff0100ff000001, 0x01ff0100ff010000, 0x01ff010000ffff00, 0x01ff010000ff00ff, | ||||||
|  |     0x01ff010000ff0001, 0x01ff010000ff0100, 0x01ff01000000ffff, 0x01ff01000000ff01, | ||||||
|  |     0x01ff010000000000, 0x01ff010000000101, 0x01ff01000001ff00, 0x01ff0100000100ff, | ||||||
|  |     0x01ff010001ff0000, 0x01ff010001000001, 0x01ff010001000100, 0x01ff010001010000, | ||||||
|  |     0x01ff0101ffffffff, 0x01ff0101ffffff01, 0x01ff0101ffff01ff, 0x01ff0101ffff0101, | ||||||
|  |     0x01ff0101ff000000, 0x01ff0101ff01ffff, 0x01ff0101ff01ff01, 0x01ff0101ff0101ff, | ||||||
|  |     0x01ff0101ff010101, 0x01ff010100ff0000, 0x01ff01010000ff00, 0x01ff0101000000ff, | ||||||
|  |     0x01ff010100000001, 0x01ff010101ffffff, 0x01ff010101ffff01, 0x01ff010101ff01ff, | ||||||
|  |     0x01ff010101ff0101, 0x01ff010101000000, 0x01ff01010101ffff, 0x01ff01010101ff01, | ||||||
|  |     0x01ff0101010101ff, 0x01ff010101010101, 0x0100ffffffff0000, 0x0100ffffff00ff00, | ||||||
|  |     0x0100ffffff000001, 0x0100ffffff0001ff, 0x0100ffffff000100, 0x0100ffffff010000, | ||||||
|  |     0x0100ffff00ffff00, 0x0100ffff00ff0001, 0x0100ffff00ff0100, 0x0100ffff00000000, | ||||||
|  |     0x0100ffff000001ff, 0x0100ffff00000101, 0x0100ffff00010100, 0x0100ffff00010101, | ||||||
|  |     0x0100ffff01ff0000, 0x0100ffff0100ff00, 0x0100ffff010000ff, 0x0100ffff01000001, | ||||||
|  |     0x0100ffff01000100, 0x0100ffff01010000, 0x0100ff00ffffff00, 0x0100ff00ffff00ff, | ||||||
|  |     0x0100ff00ffff0001, 0x0100ff00ffff0100, 0x0100ff00ff00ffff, 0x0100ff00ff000000, | ||||||
|  |     0x0100ff00ff0001ff, 0x0100ff00ff000101, 0x0100ff00ff01ff00, 0x0100ff00ff0100ff, | ||||||
|  |     0x0100ff00ff010001, 0x0100ff00ff010100, 0x0100ff0000ffffff, 0x0100ff0000ff0000, | ||||||
|  |     0x0100ff000000ffff, 0x0100ff000000ff00, 0x0100ff00000000ff, 0x0100ff0000000000, | ||||||
|  |     0x0100ff0000000001, 0x0100ff0000000100, 0x0100ff000001ff01, 0x0100ff0000010000, | ||||||
|  |     0x0100ff0001ff00ff, 0x0100ff0001ff0001, 0x0100ff000100ff01, 0x0100ff0001000000, | ||||||
|  |     0x0100ff00010001ff, 0x0100ff000101ff00, 0x0100ff00010100ff, 0x0100ff0001010001, | ||||||
|  |     0x0100ff0001010100, 0x0100ff01ffff0000, 0x0100ff01ff00ff00, 0x0100ff01ff0000ff, | ||||||
|  |     0x0100ff01ff000100, 0x0100ff01ff010000, 0x0100ff0100ff00ff, 0x0100ff0100ff0001, | ||||||
|  |     0x0100ff0100ff0100, 0x0100ff010000ffff, 0x0100ff010000ff01, 0x0100ff0100000000, | ||||||
|  |     0x0100ff01000001ff, 0x0100ff0100010001, 0x0100ff0100010100, 0x0100ff0101ff0000, | ||||||
|  |     0x0100ff01010000ff, 0x0100ff0101000001, 0x0100ff0101010100, 0x010000ffffffff00, | ||||||
|  |     0x010000ffffff00ff, 0x010000ffffff0001, 0x010000ffff00ffff, 0x010000ffff000000, | ||||||
|  |     0x010000ffff0001ff, 0x010000ffff010001, 0x010000ff00ffffff, 0x010000ff00ff0101, | ||||||
|  |     0x010000ff0000ff00, 0x010000ff000000ff, 0x010000ff00000000, 0x010000ff00000001, | ||||||
|  |     0x010000ff000001ff, 0x010000ff00000100, 0x010000ff0001ffff, 0x010000ff0001ff00, | ||||||
|  |     0x010000ff0001ff01, 0x010000ff00010000, 0x010000ff01ff00ff, 0x010000ff01ff0001, | ||||||
|  |     0x010000ff0100ff01, 0x010000ff010000ff, 0x010000ff01000000, 0x010000ff010001ff, | ||||||
|  |     0x010000ff0101ff00, 0x010000ff01010100, 0x01000000ffffffff, 0x01000000ffff0000, | ||||||
|  |     0x01000000ffff01ff, 0x01000000ffff0101, 0x01000000ff00ffff, 0x01000000ff00ff00, | ||||||
|  |     0x01000000ff0000ff, 0x01000000ff000000, 0x01000000ff000001, 0x01000000ff000100, | ||||||
|  |     0x01000000ff01ff00, 0x01000000ff010000, 0x01000000ff010100, 0x01000000ff010101, | ||||||
|  |     0x0100000000ffff00, 0x0100000000ff00ff, 0x0100000000ff0000, 0x0100000000ff0001, | ||||||
|  |     0x0100000000ff0100, 0x010000000000ffff, 0x010000000000ff00, 0x010000000000ff01, | ||||||
|  |     0x01000000000000ff, 0x0100000000000000, 0x0100000000000001, 0x01000000000001ff, | ||||||
|  |     0x0100000000000100, 0x0100000000000101, 0x010000000001ff00, 0x01000000000100ff, | ||||||
|  |     0x0100000000010000, 0x0100000000010001, 0x0100000000010100, 0x0100000001ffff00, | ||||||
|  |     0x0100000001ff0000, 0x0100000001ff01ff, 0x010000000100ff00, 0x010000000100ff01, | ||||||
|  |     0x01000000010000ff, 0x0100000001000000, 0x0100000001000001, 0x0100000001000100, | ||||||
|  |     0x0100000001000101, 0x010000000101ffff, 0x010000000101ff01, 0x0100000001010000, | ||||||
|  |     0x01000000010101ff, 0x0100000001010101, 0x01000001ffffff00, 0x01000001ffff00ff, | ||||||
|  |     0x01000001ff00ffff, 0x01000001ff000000, 0x01000001ff000100, 0x01000001ff01ffff, | ||||||
|  |     0x01000001ff010001, 0x01000001ff010100, 0x0100000100ff0000, 0x0100000100ff01ff, | ||||||
|  |     0x0100000100ff0100, 0x010000010000ff00, 0x010000010000ff01, 0x0100000100000000, | ||||||
|  |     0x0100000100000001, 0x0100000100000100, 0x0100000100010000, 0x01000001000101ff, | ||||||
|  |     0x0100000101ffff01, 0x0100000101ff00ff, 0x0100000101ff0100, 0x0100000101ff0101, | ||||||
|  |     0x010000010100ff01, 0x01000001010000ff, 0x0100000101000000, 0x01000001010100ff, | ||||||
|  |     0x0100000101010001, 0x0100000101010100, 0x010001ffffff0000, 0x010001ffff000001, | ||||||
|  |     0x010001ffff000100, 0x010001ffff010000, 0x010001ff00ffff00, 0x010001ff00ff0001, | ||||||
|  |     0x010001ff0000ffff, 0x010001ff0000ff01, 0x010001ff00000000, 0x010001ff00000001, | ||||||
|  |     0x010001ff00000101, 0x010001ff000100ff, 0x010001ff00010000, 0x010001ff01ff0000, | ||||||
|  |     0x010001ff0100ff00, 0x010001ff01000001, 0x010001ff01000100, 0x010001ff01010000, | ||||||
|  |     0x01000100ffff00ff, 0x01000100ffff0001, 0x01000100ffff0100, 0x01000100ff00ffff, | ||||||
|  |     0x01000100ff00ff01, 0x01000100ff000000, 0x01000100ff0001ff, 0x01000100ff000101, | ||||||
|  |     0x01000100ff01ffff, 0x01000100ff01ff00, 0x01000100ff0100ff, 0x01000100ff010001, | ||||||
|  |     0x0100010000ffffff, 0x0100010000ffff01, 0x0100010000ff0000, 0x0100010000ff01ff, | ||||||
|  |     0x0100010000ff0101, 0x010001000000ff00, 0x01000100000000ff, 0x0100010000000000, | ||||||
|  |     0x0100010000000001, 0x0100010000000100, 0x010001000001ff01, 0x0100010000010000, | ||||||
|  |     0x0100010000010001, 0x0100010000010101, 0x0100010001ffff00, 0x0100010001ff00ff, | ||||||
|  |     0x010001000100ffff, 0x010001000100ff01, 0x0100010001000000, 0x0100010001000101, | ||||||
|  |     0x010001000101ff00, 0x0100010001010001, 0x01000101ffff0000, 0x01000101ff000000, | ||||||
|  |     0x01000101ff010000, 0x0100010100ff00ff, 0x0100010100ff0001, 0x0100010100ff0100, | ||||||
|  |     0x010001010000ffff, 0x0100010100000000, 0x01000101000001ff, 0x010001010001ff00, | ||||||
|  |     0x0100010101ff0000, 0x010001010100ff00, 0x01000101010000ff, 0x0100010101000000, | ||||||
|  |     0x0100010101000001, 0x0101ffffffffffff, 0x0101ffffffffff01, 0x0101ffffffff01ff, | ||||||
|  |     0x0101ffffffff0101, 0x0101ffffff000000, 0x0101ffffff01ffff, 0x0101ffffff01ff01, | ||||||
|  |     0x0101ffffff0101ff, 0x0101ffffff010101, 0x0101ffff00ff0000, 0x0101ffff0000ff00, | ||||||
|  |     0x0101ffff000000ff, 0x0101ffff00000001, 0x0101ffff00000100, 0x0101ffff01ffffff, | ||||||
|  |     0x0101ffff01ffff01, 0x0101ffff01ff01ff, 0x0101ffff01ff0101, 0x0101ffff01000000, | ||||||
|  |     0x0101ffff0101ffff, 0x0101ffff0101ff01, 0x0101ffff010101ff, 0x0101ffff01010101, | ||||||
|  |     0x0101ff00ffff0000, 0x0101ff00ffff0100, 0x0101ff00ff00ff00, 0x0101ff00ff0000ff, | ||||||
|  |     0x0101ff00ff000001, 0x0101ff00ff000100, 0x0101ff00ff000101, 0x0101ff0000ff0001, | ||||||
|  |     0x0101ff0000ff0100, 0x0101ff000000ff00, 0x0101ff0000000000, 0x0101ff00000001ff, | ||||||
|  |     0x0101ff0000000101, 0x0101ff000001ff00, 0x0101ff00000100ff, 0x0101ff0001ff0000, | ||||||
|  |     0x0101ff000100ffff, 0x0101ff000100ff01, 0x0101ff0001000001, 0x0101ff0001000100, | ||||||
|  |     0x0101ff01ffffff01, 0x0101ff01ffff01ff, 0x0101ff01ffff0101, 0x0101ff01ff00ffff, | ||||||
|  |     0x0101ff01ff000100, 0x0101ff01ff01ff01, 0x0101ff01ff0101ff, 0x0101ff01ff010101, | ||||||
|  |     0x0101ff0100ff0000, 0x0101ff010000ff00, 0x0101ff0100000001, 0x0101ff0100000100, | ||||||
|  |     0x0101ff0100010000, 0x0101ff0101ffffff, 0x0101ff0101ffff01, 0x0101ff0101ff01ff, | ||||||
|  |     0x0101ff0101ff0101, 0x0101ff0101000000, 0x0101ff010101ffff, 0x0101ff010101ff01, | ||||||
|  |     0x0101ff01010101ff, 0x0101ff0101010101, 0x010100ffff000100, 0x010100ffff010000, | ||||||
|  |     0x010100ff00ffff00, 0x010100ff00ff00ff, 0x010100ff0000ffff, 0x010100ff000000ff, | ||||||
|  |     0x010100ff00000000, 0x010100ff000001ff, 0x010100ff00000101, 0x010100ff0001ff00, | ||||||
|  |     0x010100ff00010000, 0x010100ff00010001, 0x010100ff000101ff, 0x010100ff00010100, | ||||||
|  |     0x010100ff01ff0000, 0x01010000ffff0001, 0x01010000ffff0100, 0x01010000ff00ffff, | ||||||
|  |     0x01010000ff00ff01, 0x01010000ff000000, 0x01010000ff0001ff, 0x01010000ff010001, | ||||||
|  |     0x01010000ff010100, 0x0101000000ffff01, 0x0101000000ff0000, 0x010100000000ff00, | ||||||
|  |     0x01010000000000ff, 0x0101000000000000, 0x0101000000000001, 0x0101000000000100, | ||||||
|  |     0x0101000000010000, 0x0101000000010101, 0x0101000001ffff00, 0x0101000001ff00ff, | ||||||
|  |     0x0101000001ff0000, 0x0101000001ff0001, 0x0101000001ff0100, 0x010100000100ff01, | ||||||
|  |     0x0101000001000000, 0x01010000010001ff, 0x01010001ffff0000, 0x01010001ff00ff00, | ||||||
|  |     0x01010001ff000001, 0x01010001ff000101, 0x01010001ff01ff00, 0x01010001ff010000, | ||||||
|  |     0x0101000100ff00ff, 0x0101000100ff0001, 0x0101000100ff0101, 0x010100010000ff01, | ||||||
|  |     0x0101000100000000, 0x0101000100000001, 0x01010001000001ff, 0x010100010001ffff, | ||||||
|  |     0x010100010001ff01, 0x0101000101ff0001, 0x010100010100ffff, 0x0101000101000000, | ||||||
|  |     0x0101000101000001, 0x0101000101000100, 0x010100010101ff00, 0x01010001010100ff, | ||||||
|  |     0x0101000101010001, 0x010101ffffffffff, 0x010101ffffffff01, 0x010101ffffff01ff, | ||||||
|  |     0x010101ffffff0101, 0x010101ffff01ffff, 0x010101ffff01ff01, 0x010101ffff0101ff, | ||||||
|  |     0x010101ffff010101, 0x010101ff0000ff00, 0x010101ff000000ff, 0x010101ff00000001, | ||||||
|  |     0x010101ff00000100, 0x010101ff01ffffff, 0x010101ff01ffff01, 0x010101ff01ff01ff, | ||||||
|  |     0x010101ff01ff0101, 0x010101ff01000000, 0x010101ff0101ffff, 0x010101ff0101ff01, | ||||||
|  |     0x010101ff010101ff, 0x010101ff01010101, 0x01010100ffff0000, 0x01010100ff0000ff, | ||||||
|  |     0x01010100ff000100, 0x01010100ff01ff00, 0x01010100ff010000, 0x0101010000ffff00, | ||||||
|  |     0x010101000000ffff, 0x0101010000000000, 0x0101010000000101, 0x010101000001ff00, | ||||||
|  |     0x0101010000010001, 0x0101010000010100, 0x010101000100ffff, 0x0101010001000001, | ||||||
|  |     0x01010101ffffffff, 0x01010101ffffff01, 0x01010101ffff01ff, 0x01010101ffff0101, | ||||||
|  |     0x01010101ff01ffff, 0x01010101ff01ff01, 0x01010101ff0101ff, 0x01010101ff010101, | ||||||
|  |     0x010101010000ff00, 0x01010101000000ff, 0x0101010100000001, 0x0101010101ffffff, | ||||||
|  |     0x0101010101ffff01, 0x0101010101ff01ff, 0x0101010101ff0101, 0x0101010101000000, | ||||||
|  |     0x010101010101ffff, 0x010101010101ff01, 0x01010101010101ff, 0x0101010101010101, | ||||||
| GGML_TABLE_END() | GGML_TABLE_END() | ||||||
|  | #else | ||||||
|  | GGML_TABLE_BEGIN(uint32_t, iq1s_grid_gpu, NGRID_IQ1S) | ||||||
|  |     0x00000000, 0x00000002, 0x00000101, 0x00000200, 0x00000202, 0x00010001, 0x00010101, 0x00020000, | ||||||
|  |     0x00020002, 0x00020200, 0x00020202, 0x01000101, 0x01010001, 0x01010100, 0x01010102, 0x01020101, | ||||||
|  |     0x02000000, 0x02000002, 0x02000200, 0x02000202, 0x02010101, 0x02020000, 0x02020002, 0x02020200, | ||||||
|  |     0x02020202, 0x00000110, 0x00000111, 0x00010011, 0x00010110, 0x00010112, 0x00010211, 0x00010212, | ||||||
|  |     0x00020111, 0x01000011, 0x01000112, 0x01000211, 0x01010012, 0x01010111, 0x01010212, 0x01020011, | ||||||
|  |     0x01020110, 0x01020112, 0x01020210, 0x02000111, 0x02010011, 0x02010110, 0x02010112, 0x02020111, | ||||||
|  |     0x00000020, 0x00000022, 0x00000220, 0x00000222, 0x00010121, 0x00020020, 0x00020022, 0x00020220, | ||||||
|  |     0x00020222, 0x01000121, 0x01010021, 0x01010221, 0x01020120, 0x01020221, 0x02000020, 0x02000022, | ||||||
|  |     0x02000220, 0x02000222, 0x02010021, 0x02010121, 0x02010221, 0x02020020, 0x02020022, 0x02020220, | ||||||
|  |     0x02020222, 0x00011001, 0x00011100, 0x00011102, 0x00021101, 0x01001001, 0x01001201, 0x01011101, | ||||||
|  |     0x01011202, 0x01021100, 0x01021101, 0x02011001, 0x02011201, 0x02021101, 0x00001011, 0x00001110, | ||||||
|  |     0x00001111, 0x00001112, 0x00011111, 0x00011210, 0x00011212, 0x00021211, 0x01001010, 0x01001111, | ||||||
|  |     0x01001212, 0x01011010, 0x01011011, 0x01011110, 0x01011111, 0x01011112, 0x01011211, 0x01021010, | ||||||
|  |     0x01021012, 0x01021111, 0x01021210, 0x01021212, 0x02001011, 0x02011011, 0x02011111, 0x02011210, | ||||||
|  |     0x02011212, 0x02021011, 0x02021110, 0x02021111, 0x02021112, 0x02021211, 0x00011120, 0x00011221, | ||||||
|  |     0x01001021, 0x01001120, 0x01011020, 0x01011022, 0x01011121, 0x01011220, 0x01021020, 0x01021021, | ||||||
|  |     0x01021122, 0x01021221, 0x02001121, 0x02011021, 0x02011120, 0x02011221, 0x00002000, 0x00002002, | ||||||
|  |     0x00002200, 0x00002202, 0x00012101, 0x00022000, 0x00022002, 0x00022200, 0x00022202, 0x01002101, | ||||||
|  |     0x01012001, 0x01012102, 0x01022101, 0x02002000, 0x02002002, 0x02002200, 0x02002202, 0x02012101, | ||||||
|  |     0x02022000, 0x02022002, 0x02022200, 0x02022202, 0x00002111, 0x00012011, 0x00012110, 0x00012211, | ||||||
|  |     0x00022110, 0x00022111, 0x01002011, 0x01012010, 0x01012011, 0x01012111, 0x01022011, 0x01022110, | ||||||
|  |     0x01022211, 0x02012011, 0x02012110, 0x02012112, 0x02012211, 0x02022111, 0x00002020, 0x00002022, | ||||||
|  |     0x00002220, 0x00002222, 0x00012121, 0x00022020, 0x00022022, 0x00022220, 0x00022222, 0x01002121, | ||||||
|  |     0x01012021, 0x01012221, 0x01022021, 0x01022121, 0x02002020, 0x02002022, 0x02002121, 0x02002220, | ||||||
|  |     0x02002222, 0x02012121, 0x02022020, 0x02022022, 0x02022220, 0x02022222, 0x00110000, 0x00110001, | ||||||
|  |     0x00110100, 0x00110201, 0x00120100, 0x00120101, 0x01100001, 0x01100100, 0x01110000, 0x01110101, | ||||||
|  |     0x01110200, 0x01120001, 0x01120100, 0x01120101, 0x01120201, 0x02110001, 0x02110100, 0x02110102, | ||||||
|  |     0x02120001, 0x02120101, 0x00100011, 0x00100110, 0x00100112, 0x00100211, 0x00110010, 0x00110012, | ||||||
|  |     0x00110111, 0x00110210, 0x00120011, 0x00120110, 0x00120211, 0x01100111, 0x01100212, 0x01110010, | ||||||
|  |     0x01110011, 0x01110012, 0x01110110, 0x01110111, 0x01110112, 0x01110211, 0x01120010, 0x01120111, | ||||||
|  |     0x02100110, 0x02110012, 0x02110111, 0x02120011, 0x02120110, 0x00110021, 0x00110120, 0x00110122, | ||||||
|  |     0x00120121, 0x01100020, 0x01100122, 0x01100221, 0x01110022, 0x01110121, 0x01110220, 0x01110222, | ||||||
|  |     0x01120120, 0x01120122, 0x02100121, 0x02110021, 0x02110120, 0x02110122, 0x02120121, 0x00101001, | ||||||
|  |     0x00101102, 0x00101201, 0x00111100, 0x00111101, 0x00111200, 0x00111201, 0x00121001, 0x00121102, | ||||||
|  |     0x01101001, 0x01101101, 0x01101102, 0x01101200, 0x01101202, 0x01111001, 0x01111100, 0x01111101, | ||||||
|  |     0x01111102, 0x01111201, 0x01121002, 0x01121101, 0x01121200, 0x02101100, 0x02101201, 0x02111000, | ||||||
|  |     0x02111100, 0x02111101, 0x02111200, 0x02111201, 0x02111202, 0x02121001, 0x02121100, 0x02121101, | ||||||
|  |     0x02121201, 0x00101012, 0x00101111, 0x00101212, 0x00111011, 0x00111110, 0x00111111, 0x00111112, | ||||||
|  |     0x00111211, 0x00121010, 0x00121012, 0x00121111, 0x00121210, 0x00121212, 0x01101011, 0x01101110, | ||||||
|  |     0x01101111, 0x01101112, 0x01111011, 0x01111012, 0x01111110, 0x01111111, 0x01111112, 0x01111211, | ||||||
|  |     0x01111212, 0x01121011, 0x01121110, 0x01121111, 0x01121112, 0x01121211, 0x02101010, 0x02101012, | ||||||
|  |     0x02101110, 0x02101111, 0x02101210, 0x02101212, 0x02111010, 0x02111011, 0x02111110, 0x02111111, | ||||||
|  |     0x02111112, 0x02111211, 0x02111212, 0x02121010, 0x02121012, 0x02121111, 0x00101021, 0x00101120, | ||||||
|  |     0x00101121, 0x00101122, 0x00111121, 0x00111122, 0x00111220, 0x00111222, 0x00121021, 0x00121122, | ||||||
|  |     0x01101020, 0x01101022, 0x01101120, 0x01101121, 0x01101220, 0x01101222, 0x01111021, 0x01111121, | ||||||
|  |     0x01111122, 0x01111220, 0x01111221, 0x01121021, 0x01121120, 0x01121121, 0x01121220, 0x01121221, | ||||||
|  |     0x01121222, 0x02101122, 0x02101222, 0x02111022, 0x02111121, 0x02121120, 0x02121221, 0x00112001, | ||||||
|  |     0x00112102, 0x00122101, 0x01102001, 0x01102100, 0x01102102, 0x01102201, 0x01112000, 0x01112101, | ||||||
|  |     0x01112200, 0x01112202, 0x01122000, 0x01122001, 0x01122100, 0x01122102, 0x01122201, 0x02102101, | ||||||
|  |     0x02112001, 0x02112100, 0x02122101, 0x00112010, 0x00112012, 0x00112111, 0x00112212, 0x00122011, | ||||||
|  |     0x00122111, 0x01102012, 0x01102110, 0x01102111, 0x01102210, 0x01112011, 0x01112110, 0x01112111, | ||||||
|  |     0x01112112, 0x01112211, 0x01112212, 0x01122010, 0x01122111, 0x01122212, 0x02102211, 0x02112011, | ||||||
|  |     0x02112012, 0x02112111, 0x02112210, 0x02122011, 0x02122112, 0x02122211, 0x00102221, 0x00112122, | ||||||
|  |     0x00122120, 0x00122122, 0x01102120, 0x01102122, 0x01102221, 0x01112020, 0x01112022, 0x01112121, | ||||||
|  |     0x01112220, 0x01122021, 0x01122122, 0x01122221, 0x02102121, 0x02112021, 0x02112122, 0x02112222, | ||||||
|  |     0x00200000, 0x00200002, 0x00200200, 0x00200202, 0x00210101, 0x00220000, 0x00220002, 0x00220101, | ||||||
|  |     0x00220200, 0x00220202, 0x01200101, 0x01210001, 0x01210201, 0x01220001, 0x01220101, 0x02200000, | ||||||
|  |     0x02200002, 0x02200200, 0x02200202, 0x02210101, 0x02220000, 0x02220002, 0x02220101, 0x02220200, | ||||||
|  |     0x02220202, 0x00200111, 0x00210011, 0x00210110, 0x00210211, 0x00220111, 0x01200012, 0x01200110, | ||||||
|  |     0x01200211, 0x01210111, 0x01210210, 0x01210212, 0x01220011, 0x01220110, 0x01220111, 0x01220112, | ||||||
|  |     0x02200111, 0x02210010, 0x02210112, 0x02210211, 0x02220111, 0x00200021, 0x00200220, 0x00200222, | ||||||
|  |     0x00210021, 0x00210121, 0x00220020, 0x00220022, 0x00220220, 0x00220222, 0x01200121, 0x01210021, | ||||||
|  |     0x01210122, 0x01210221, 0x01220121, 0x02200021, 0x02200220, 0x02200222, 0x02210021, 0x02210121, | ||||||
|  |     0x02220020, 0x02220022, 0x02220220, 0x02220222, 0x00201101, 0x00211100, 0x00211102, 0x00211201, | ||||||
|  |     0x00221101, 0x01201100, 0x01201101, 0x01201102, 0x01201201, 0x01211002, 0x01211101, 0x01211200, | ||||||
|  |     0x01211202, 0x01221102, 0x02201101, 0x02211001, 0x02211100, 0x02211201, 0x02221001, 0x02221101, | ||||||
|  |     0x00201211, 0x00211111, 0x00221011, 0x00221211, 0x01201010, 0x01201111, 0x01201210, 0x01211011, | ||||||
|  |     0x01211110, 0x01211111, 0x01211211, 0x01221012, 0x01221111, 0x01221210, 0x02201211, 0x02211010, | ||||||
|  |     0x02211110, 0x02211111, 0x02211210, 0x02211212, 0x02221011, 0x02221110, 0x02221112, 0x02221211, | ||||||
|  |     0x00201121, 0x00211020, 0x00211022, 0x00211221, 0x00221121, 0x01201021, 0x01201221, 0x01211121, | ||||||
|  |     0x01221020, 0x01221021, 0x01221221, 0x02201120, 0x02201122, 0x02211020, 0x02211222, 0x00202000, | ||||||
|  |     0x00202002, 0x00202200, 0x00202202, 0x00212101, 0x00222000, 0x00222002, 0x00222200, 0x00222202, | ||||||
|  |     0x01202101, 0x01212001, 0x01212100, 0x01222101, 0x02202000, 0x02202002, 0x02202200, 0x02202202, | ||||||
|  |     0x02222000, 0x02222002, 0x02222200, 0x02222202, 0x00202211, 0x00212011, 0x00212110, 0x00212211, | ||||||
|  |     0x00222111, 0x01202112, 0x01202211, 0x01212012, 0x01212111, 0x01222011, 0x01222110, 0x01222112, | ||||||
|  |     0x01222211, 0x02202111, 0x02212010, 0x02212112, 0x02212211, 0x02222110, 0x02222111, 0x00202020, | ||||||
|  |     0x00202022, 0x00202220, 0x00202222, 0x00222020, 0x00222022, 0x00222220, 0x00222222, 0x01202121, | ||||||
|  |     0x01212021, 0x01212122, 0x01212221, 0x01222121, 0x02202020, 0x02202022, 0x02202220, 0x02202222, | ||||||
|  |     0x02212121, 0x02222020, 0x02222022, 0x02222220, 0x02222222, 0x10000101, 0x10010001, 0x10010102, | ||||||
|  |     0x10020101, 0x11000201, 0x11010002, 0x11010101, 0x11010200, 0x11010202, 0x11020001, 0x11020100, | ||||||
|  |     0x11020102, 0x12010100, 0x12010201, 0x12020001, 0x12020102, 0x10000010, 0x10000011, 0x10000110, | ||||||
|  |     0x10000112, 0x10000211, 0x10010012, 0x10010111, 0x10010112, 0x10010210, 0x10010212, 0x10020011, | ||||||
|  |     0x10020112, 0x10020211, 0x11000111, 0x11000210, 0x11000212, 0x11010011, 0x11010110, 0x11010111, | ||||||
|  |     0x11010112, 0x11010211, 0x11010212, 0x11020111, 0x11020210, 0x11020212, 0x12000011, 0x12000110, | ||||||
|  |     0x12000112, 0x12010010, 0x12010012, 0x12010111, 0x12020010, 0x12020011, 0x12020012, 0x10000121, | ||||||
|  |     0x10010021, 0x10010120, 0x10010122, 0x10020121, 0x11000021, 0x11010022, 0x11010121, 0x11010222, | ||||||
|  |     0x11020120, 0x11020221, 0x12000221, 0x12010120, 0x12020121, 0x10001001, 0x10011101, 0x10011201, | ||||||
|  |     0x10021201, 0x11001101, 0x11001200, 0x11001202, 0x11011001, 0x11011100, 0x11011101, 0x11011102, | ||||||
|  |     0x11021001, 0x11021002, 0x11021101, 0x11021200, 0x11021202, 0x12001001, 0x12001102, 0x12001201, | ||||||
|  |     0x12011000, 0x12011002, 0x12011101, 0x12021000, 0x12021001, 0x12021201, 0x10001011, 0x10001012, | ||||||
|  |     0x10001111, 0x10001212, 0x10011011, 0x10011110, 0x10011111, 0x10011112, 0x10011211, 0x10021010, | ||||||
|  |     0x10021111, 0x10021212, 0x11001011, 0x11001110, 0x11001111, 0x11001112, 0x11001211, 0x11011010, | ||||||
|  |     0x11011011, 0x11011110, 0x11011111, 0x11011112, 0x11011210, 0x11011211, 0x11021011, 0x11021110, | ||||||
|  |     0x11021111, 0x11021112, 0x11021211, 0x12001012, 0x12001110, 0x12001111, 0x12001210, 0x12011011, | ||||||
|  |     0x12011110, 0x12011111, 0x12011112, 0x12011211, 0x12011212, 0x12021111, 0x12021210, 0x12021212, | ||||||
|  |     0x10001021, 0x10001121, 0x10001221, 0x10011120, 0x10011121, 0x10011220, 0x10011222, 0x10021021, | ||||||
|  |     0x10021120, 0x10021221, 0x11001020, 0x11001022, 0x11001121, 0x11001220, 0x11011020, 0x11011021, | ||||||
|  |     0x11011022, 0x11011121, 0x11011122, 0x11011221, 0x11021022, 0x11021121, 0x11021220, 0x12001021, | ||||||
|  |     0x12001121, 0x12001222, 0x12011120, 0x12011121, 0x12021021, 0x12021120, 0x12021122, 0x10002101, | ||||||
|  |     0x10012001, 0x10012101, 0x10012202, 0x10022101, 0x11002002, 0x11002201, 0x11012000, 0x11012101, | ||||||
|  |     0x11012200, 0x11022001, 0x11022100, 0x11022102, 0x11022201, 0x12002101, 0x12012001, 0x12012100, | ||||||
|  |     0x12012102, 0x12012201, 0x12022101, 0x10002011, 0x10002111, 0x10002112, 0x10002212, 0x10012010, | ||||||
|  |     0x10012110, 0x10012111, 0x10012210, 0x10022011, 0x10022110, 0x10022112, 0x11002010, 0x11002111, | ||||||
|  |     0x11002212, 0x11012011, 0x11012012, 0x11012110, 0x11012111, 0x11012112, 0x11012211, 0x11022010, | ||||||
|  |     0x11022012, 0x11022111, 0x11022112, 0x11022212, 0x12002112, 0x12002211, 0x12012012, 0x12012111, | ||||||
|  |     0x12012112, 0x12012210, 0x12022011, 0x12022110, 0x12022112, 0x12022211, 0x10012122, 0x11002120, | ||||||
|  |     0x11002122, 0x11002221, 0x11012121, 0x11012220, 0x11012222, 0x11022120, 0x11022221, 0x12012120, | ||||||
|  |     0x12022121, 0x10100001, 0x10100100, 0x10100101, 0x10100102, 0x10100201, 0x10110002, 0x10110101, | ||||||
|  |     0x10110202, 0x10120001, 0x10120100, 0x10120201, 0x11100000, 0x11100101, 0x11100200, 0x11110001, | ||||||
|  |     0x11110100, 0x11110101, 0x11110102, 0x11110201, 0x11120101, 0x11120200, 0x12100102, 0x12100201, | ||||||
|  |     0x12110101, 0x12110200, 0x12120000, 0x12120001, 0x12120102, 0x12120201, 0x10100111, 0x10100210, | ||||||
|  |     0x10100211, 0x10100212, 0x10110011, 0x10110110, 0x10110111, 0x10110112, 0x10110210, 0x10110211, | ||||||
|  |     0x10120010, 0x10120111, 0x10120112, 0x10120210, 0x10120212, 0x11100011, 0x11100110, 0x11100111, | ||||||
|  |     0x11100112, 0x11100211, 0x11110010, 0x11110011, 0x11110012, 0x11110110, 0x11110111, 0x11110112, | ||||||
|  |     0x11110210, 0x11110211, 0x11110212, 0x11120011, 0x11120110, 0x11120111, 0x11120112, 0x11120211, | ||||||
|  |     0x12100012, 0x12100111, 0x12110011, 0x12110110, 0x12110111, 0x12110112, 0x12110211, 0x12120010, | ||||||
|  |     0x12120111, 0x12120212, 0x10100021, 0x10100122, 0x10110022, 0x10110121, 0x10110222, 0x10120021, | ||||||
|  |     0x10120120, 0x11100022, 0x11100121, 0x11100222, 0x11110021, 0x11110120, 0x11110121, 0x11110122, | ||||||
|  |     0x11110221, 0x11120022, 0x11120121, 0x12100121, 0x12110020, 0x12110022, 0x12110121, 0x12110221, | ||||||
|  |     0x12110222, 0x12120120, 0x10101100, 0x10101101, 0x10111001, 0x10111100, 0x10111101, 0x10111102, | ||||||
|  |     0x10111200, 0x10111201, 0x10121001, 0x10121101, 0x10121200, 0x10121202, 0x11101001, 0x11101100, | ||||||
|  |     0x11101101, 0x11101102, 0x11101201, 0x11101202, 0x11111000, 0x11111001, 0x11111100, 0x11111101, | ||||||
|  |     0x11111102, 0x11111200, 0x11111201, 0x11111202, 0x11121001, 0x11121002, 0x11121100, 0x11121101, | ||||||
|  |     0x11121102, 0x11121201, 0x12101000, 0x12101200, 0x12101202, 0x12111001, 0x12111100, 0x12111101, | ||||||
|  |     0x12111102, 0x12111201, 0x12121001, 0x12121100, 0x12121101, 0x12121202, 0x10101011, 0x10101012, | ||||||
|  |     0x10101110, 0x10101111, 0x10101112, 0x10101211, 0x10111010, 0x10111011, 0x10111012, 0x10111110, | ||||||
|  |     0x10111111, 0x10111112, 0x10111211, 0x10111212, 0x10121011, 0x10121110, 0x10121111, 0x10121112, | ||||||
|  |     0x10121211, 0x11101010, 0x11101011, 0x11101012, 0x11101110, 0x11101111, 0x11101112, 0x11101210, | ||||||
|  |     0x11101211, 0x11111010, 0x11111011, 0x11111012, 0x11111110, 0x11111111, 0x11111112, 0x11111210, | ||||||
|  |     0x11111211, 0x11111212, 0x11121010, 0x11121011, 0x11121110, 0x11121111, 0x11121112, 0x11121210, | ||||||
|  |     0x11121211, 0x11121212, 0x12101011, 0x12101110, 0x12101111, 0x12101211, 0x12101212, 0x12111010, | ||||||
|  |     0x12111011, 0x12111110, 0x12111111, 0x12111112, 0x12111210, 0x12111211, 0x12121011, 0x12121110, | ||||||
|  |     0x12121111, 0x12121112, 0x12121211, 0x10101020, 0x10101021, 0x10101022, 0x10101120, 0x10101122, | ||||||
|  |     0x10101220, 0x10101221, 0x10111021, 0x10111120, 0x10111121, 0x10111220, 0x10111221, 0x10121020, | ||||||
|  |     0x10121021, 0x10121022, 0x10121120, 0x10121121, 0x10121122, 0x10121220, 0x10121221, 0x11101021, | ||||||
|  |     0x11101121, 0x11101122, 0x11101220, 0x11101221, 0x11101222, 0x11111020, 0x11111021, 0x11111022, | ||||||
|  |     0x11111120, 0x11111121, 0x11111122, 0x11111220, 0x11111221, 0x11111222, 0x11121021, 0x11121120, | ||||||
|  |     0x11121121, 0x11121221, 0x12101022, 0x12101121, 0x12101122, 0x12101220, 0x12101221, 0x12101222, | ||||||
|  |     0x12111021, 0x12111121, 0x12111222, 0x12121022, 0x12121121, 0x12121122, 0x12121220, 0x12121221, | ||||||
|  |     0x10102100, 0x10102101, 0x10102102, 0x10102201, 0x10112000, 0x10112101, 0x10112200, 0x10122001, | ||||||
|  |     0x10122202, 0x11102101, 0x11102200, 0x11102202, 0x11112001, 0x11112100, 0x11112101, 0x11112102, | ||||||
|  |     0x11112200, 0x11112201, 0x11122000, 0x11122002, 0x11122100, 0x11122101, 0x12102002, 0x12102201, | ||||||
|  |     0x12112000, 0x12112002, 0x12112101, 0x12112200, 0x12122001, 0x12122201, 0x10102011, 0x10102012, | ||||||
|  |     0x10102111, 0x10102212, 0x10112011, 0x10112110, 0x10112111, 0x10112112, 0x10112211, 0x10122111, | ||||||
|  |     0x11102011, 0x11102110, 0x11102111, 0x11102112, 0x11102211, 0x11112010, 0x11112011, 0x11112012, | ||||||
|  |     0x11112110, 0x11112111, 0x11112112, 0x11112210, 0x11112211, 0x11112212, 0x11122011, 0x11122110, | ||||||
|  |     0x11122111, 0x11122112, 0x11122211, 0x12102011, 0x12102111, 0x12102211, 0x12112011, 0x12112110, | ||||||
|  |     0x12112111, 0x12112112, 0x12112210, 0x12112211, 0x12122111, 0x10102120, 0x10102220, 0x10112121, | ||||||
|  |     0x10112222, 0x10122020, 0x10122121, 0x10122122, 0x10122221, 0x11102121, 0x11102220, 0x11102221, | ||||||
|  |     0x11112021, 0x11112121, 0x11112122, 0x11112220, 0x11112221, 0x11122022, 0x11122121, 0x11122220, | ||||||
|  |     0x11122222, 0x12102021, 0x12102222, 0x12112022, 0x12112121, 0x12112122, 0x12112220, 0x12112222, | ||||||
|  |     0x12122021, 0x10200101, 0x10210100, 0x10210102, 0x10210201, 0x10220101, 0x11200100, 0x11210000, | ||||||
|  |     0x11210101, 0x11210102, 0x11210200, 0x11210202, 0x11220001, 0x11220100, 0x11220102, 0x11220201, | ||||||
|  |     0x12200001, 0x12210102, 0x12220101, 0x10200011, 0x10200110, 0x10200112, 0x10200211, 0x10210012, | ||||||
|  |     0x10210111, 0x10220011, 0x10220012, 0x10220112, 0x10220211, 0x11200111, 0x11200211, 0x11210011, | ||||||
|  |     0x11210111, 0x11210112, 0x11210211, 0x11220111, 0x11220112, 0x11220212, 0x12200110, 0x12200212, | ||||||
|  |     0x12210012, 0x12210111, 0x12220011, 0x12220112, 0x12220211, 0x10210021, 0x10210122, 0x10210221, | ||||||
|  |     0x11200020, 0x11200021, 0x11200122, 0x11210121, 0x11210122, 0x11210220, 0x11220020, 0x12200121, | ||||||
|  |     0x12210021, 0x12210122, 0x12220121, 0x10211001, 0x10211002, 0x10211101, 0x10211102, 0x10211202, | ||||||
|  |     0x10221001, 0x10221102, 0x10221201, 0x11201000, 0x11201002, 0x11201101, 0x11201200, 0x11201202, | ||||||
|  |     0x11211001, 0x11211100, 0x11211101, 0x11211102, 0x11211201, 0x11211202, 0x11221000, 0x11221002, | ||||||
|  |     0x11221101, 0x12201100, 0x12201101, 0x12201201, 0x12211000, 0x12211002, 0x12211100, 0x12211101, | ||||||
|  |     0x12211102, 0x12211200, 0x12211202, 0x12221001, 0x12221100, 0x12221201, 0x10201111, 0x10201210, | ||||||
|  |     0x10201212, 0x10211011, 0x10211111, 0x10211112, 0x10211211, 0x11201110, 0x11201111, 0x11201112, | ||||||
|  |     0x11201211, 0x11211010, 0x11211011, 0x11211110, 0x11211111, 0x11211112, 0x11211211, 0x11221011, | ||||||
|  |     0x11221110, 0x11221111, 0x11221112, 0x11221211, 0x12201112, 0x12201211, 0x12201212, 0x12211011, | ||||||
|  |     0x12211111, 0x12211112, 0x12211211, 0x12211212, 0x12221012, 0x12221111, 0x12221112, 0x12221210, | ||||||
|  |     0x10201022, 0x10201221, 0x10211121, 0x10221020, 0x10221122, 0x10221220, 0x10221221, 0x11201020, | ||||||
|  |     0x11201121, 0x11201220, 0x11201222, 0x11211021, 0x11211120, 0x11211121, 0x11211122, 0x11211220, | ||||||
|  |     0x11211222, 0x11221020, 0x11221121, 0x11221220, 0x12201020, 0x12201022, 0x12201121, 0x12201222, | ||||||
|  |     0x12211120, 0x12211122, 0x12211220, 0x12211221, 0x12221020, 0x12221120, 0x12221122, 0x12221222, | ||||||
|  |     0x10212102, 0x10212201, 0x10222101, 0x11202001, 0x11212002, 0x11212101, 0x11212202, 0x11222001, | ||||||
|  |     0x11222201, 0x12202101, 0x12212001, 0x12212200, 0x12222102, 0x10202011, 0x10202110, 0x10212010, | ||||||
|  |     0x10212111, 0x10222011, 0x10222110, 0x10222112, 0x10222211, 0x11202010, 0x11202011, 0x11202111, | ||||||
|  |     0x11202112, 0x11202210, 0x11212011, 0x11212110, 0x11212111, 0x11212112, 0x11212211, 0x11222010, | ||||||
|  |     0x11222111, 0x11222212, 0x12202012, 0x12202110, 0x12202212, 0x12212111, 0x12222011, 0x12222110, | ||||||
|  |     0x12222111, 0x12222211, 0x10212021, 0x10212122, 0x10212220, 0x11202021, 0x11202120, 0x11202221, | ||||||
|  |     0x11212020, 0x11212121, 0x11212220, 0x11212222, 0x11222120, 0x11222121, 0x11222221, 0x12202122, | ||||||
|  |     0x12212120, 0x12212220, 0x12212222, 0x12222122, 0x20000000, 0x20000002, 0x20000200, 0x20000202, | ||||||
|  |     0x20020000, 0x20020002, 0x20020200, 0x20020202, 0x21000101, 0x21010000, 0x21010001, 0x21010100, | ||||||
|  |     0x21010102, 0x21010201, 0x21020101, 0x22000000, 0x22000002, 0x22000200, 0x22000202, 0x22010101, | ||||||
|  |     0x22020000, 0x22020002, 0x22020200, 0x22020202, 0x20000111, 0x20010011, 0x20010110, 0x20010112, | ||||||
|  |     0x20010211, 0x20020111, 0x21000011, 0x21000110, 0x21000211, 0x21010010, 0x21010012, 0x21010111, | ||||||
|  |     0x21010112, 0x21010210, 0x21010211, 0x21020110, 0x21020112, 0x21020211, 0x22000111, 0x22000211, | ||||||
|  |     0x22010110, 0x22010112, 0x22010211, 0x22020111, 0x20000020, 0x20000022, 0x20000220, 0x20000222, | ||||||
|  |     0x20010121, 0x20020020, 0x20020022, 0x20020220, 0x20020222, 0x21010021, 0x21010120, 0x21010221, | ||||||
|  |     0x21020121, 0x22000020, 0x22000022, 0x22000220, 0x22000222, 0x22010121, 0x22020020, 0x22020022, | ||||||
|  |     0x22020220, 0x22020222, 0x20011100, 0x20011201, 0x21001001, 0x21001100, 0x21011001, 0x21011101, | ||||||
|  |     0x21011202, 0x21021001, 0x21021100, 0x21021201, 0x22011100, 0x22011201, 0x20001011, 0x20001211, | ||||||
|  |     0x20011012, 0x20011111, 0x20011212, 0x20021112, 0x20021211, 0x21001010, 0x21001011, 0x21001111, | ||||||
|  |     0x21001210, 0x21011011, 0x21011110, 0x21011111, 0x21011112, 0x21011211, 0x21011212, 0x21021111, | ||||||
|  |     0x21021112, 0x21021210, 0x21021212, 0x22001011, 0x22001110, 0x22001112, 0x22001211, 0x22011010, | ||||||
|  |     0x22011012, 0x22011111, 0x22011210, 0x22021112, 0x20011021, 0x20011122, 0x20011221, 0x20021121, | ||||||
|  |     0x21001021, 0x21001120, 0x21001221, 0x21001222, 0x21011020, 0x21011121, 0x21011221, 0x21011222, | ||||||
|  |     0x21021021, 0x21021122, 0x21021222, 0x22001121, 0x22011021, 0x22011222, 0x22021120, 0x20002000, | ||||||
|  |     0x20002002, 0x20002200, 0x20002202, 0x20012101, 0x20022000, 0x20022002, 0x20022200, 0x20022202, | ||||||
|  |     0x21002001, 0x21002101, 0x21012001, 0x21012100, 0x21012201, 0x21022101, 0x21022201, 0x22002000, | ||||||
|  |     0x22002002, 0x22002200, 0x22002202, 0x22012101, 0x22022000, 0x22022002, 0x22022200, 0x22022202, | ||||||
|  |     0x20002111, 0x20002112, 0x20012011, 0x20012110, 0x20012112, 0x20022111, 0x21002011, 0x21002110, | ||||||
|  |     0x21002112, 0x21002211, 0x21012010, 0x21012012, 0x21012111, 0x21012212, 0x21022011, 0x21022110, | ||||||
|  |     0x22002111, 0x22012112, 0x22012211, 0x22022111, 0x20002020, 0x20002022, 0x20002220, 0x20002222, | ||||||
|  |     0x20012121, 0x20022020, 0x20022022, 0x20022220, 0x20022222, 0x21002121, 0x21012021, 0x21012120, | ||||||
|  |     0x21012122, 0x22002020, 0x22002022, 0x22002220, 0x22002222, 0x22012121, 0x22022020, 0x22022022, | ||||||
|  |     0x22022220, 0x22022222, 0x20100101, 0x20110001, 0x20110102, 0x20110200, 0x20110201, 0x20120101, | ||||||
|  |     0x21100001, 0x21100102, 0x21100201, 0x21110101, 0x21110200, 0x21110202, 0x21120201, 0x21120202, | ||||||
|  |     0x22100101, 0x22110001, 0x22110100, 0x22110102, 0x22110201, 0x22120101, 0x20100011, 0x20100110, | ||||||
|  |     0x20100112, 0x20100211, 0x20110010, 0x20110111, 0x20110210, 0x20110212, 0x20120011, 0x20120110, | ||||||
|  |     0x20120112, 0x20120211, 0x21100010, 0x21100111, 0x21110010, 0x21110011, 0x21110110, 0x21110111, | ||||||
|  |     0x21110112, 0x21110211, 0x21120012, 0x21120111, 0x22100110, 0x22100112, 0x22110012, 0x22110111, | ||||||
|  |     0x22110210, 0x22120011, 0x22120110, 0x22120112, 0x22120211, 0x20100121, 0x20110021, 0x20110120, | ||||||
|  |     0x20110221, 0x20120121, 0x21100120, 0x21100122, 0x21100221, 0x21110020, 0x21110022, 0x21110121, | ||||||
|  |     0x21110220, 0x21120122, 0x21120221, 0x22100121, 0x22110120, 0x22110122, 0x22120221, 0x20101001, | ||||||
|  |     0x20101100, 0x20101102, 0x20111000, 0x20111101, 0x20111200, 0x20121102, 0x21101000, 0x21101202, | ||||||
|  |     0x21111001, 0x21111100, 0x21111101, 0x21111102, 0x21111200, 0x21111201, 0x21121000, 0x21121001, | ||||||
|  |     0x21121002, 0x21121101, 0x22101100, 0x22101102, 0x22111002, 0x22111100, 0x22111101, 0x22111200, | ||||||
|  |     0x22121001, 0x22121201, 0x20101010, 0x20101111, 0x20101210, 0x20101212, 0x20111010, 0x20111011, | ||||||
|  |     0x20111110, 0x20111111, 0x20111112, 0x20111211, 0x20121011, 0x20121111, 0x20121211, 0x20121212, | ||||||
|  |     0x21101011, 0x21101110, 0x21101111, 0x21101112, 0x21101211, 0x21111010, 0x21111011, 0x21111012, | ||||||
|  |     0x21111110, 0x21111111, 0x21111112, 0x21111210, 0x21111211, 0x21111212, 0x21121011, 0x21121110, | ||||||
|  |     0x21121111, 0x21121112, 0x21121211, 0x22101011, 0x22101111, 0x22101210, 0x22111011, 0x22111012, | ||||||
|  |     0x22111110, 0x22111111, 0x22111112, 0x22111211, 0x22111212, 0x22121010, 0x22121012, 0x22121111, | ||||||
|  |     0x22121210, 0x22121212, 0x20101021, 0x20101120, 0x20111020, 0x20111121, 0x20111221, 0x20121020, | ||||||
|  |     0x20121122, 0x20121221, 0x21101121, 0x21101220, 0x21101221, 0x21111021, 0x21111022, 0x21111121, | ||||||
|  |     0x21111122, 0x21111221, 0x21121121, 0x21121220, 0x22101022, 0x22101120, 0x22101221, 0x22101222, | ||||||
|  |     0x22111022, 0x22111120, 0x22111121, 0x22121120, 0x22121122, 0x22121221, 0x20102101, 0x20112102, | ||||||
|  |     0x20112201, 0x20122101, 0x21102001, 0x21102102, 0x21112000, 0x21112002, 0x21112101, 0x21112102, | ||||||
|  |     0x21112202, 0x21122100, 0x21122101, 0x22102101, 0x22112001, 0x22112102, 0x22112201, 0x22122101, | ||||||
|  |     0x20102110, 0x20102112, 0x20102211, 0x20112010, 0x20112012, 0x20112111, 0x20112210, 0x20112212, | ||||||
|  |     0x20122010, 0x20122011, 0x20122110, 0x20122112, 0x21102010, 0x21102012, 0x21102111, 0x21102210, | ||||||
|  |     0x21102212, 0x21112011, 0x21112110, 0x21112111, 0x21112112, 0x21112211, 0x21122012, 0x21122111, | ||||||
|  |     0x21122112, 0x21122212, 0x22102011, 0x22102110, 0x22112010, 0x22112012, 0x22112111, 0x22112212, | ||||||
|  |     0x22122011, 0x22122112, 0x20102121, 0x20112121, 0x20122121, 0x21102120, 0x21102122, 0x21102221, | ||||||
|  |     0x21112020, 0x21112121, 0x21112220, 0x21122021, 0x22102121, 0x22112021, 0x22112120, 0x22112121, | ||||||
|  |     0x22112122, 0x20200000, 0x20200002, 0x20200200, 0x20200202, 0x20210101, 0x20220000, 0x20220002, | ||||||
|  |     0x20220200, 0x20220202, 0x21200101, 0x21210001, 0x21210100, 0x21210102, 0x21210201, 0x22200000, | ||||||
|  |     0x22200002, 0x22200200, 0x22200202, 0x22210101, 0x22220000, 0x22220002, 0x22220200, 0x22220202, | ||||||
|  |     0x20200111, 0x20200211, 0x20210011, 0x20210110, 0x20210112, 0x20210211, 0x20210212, 0x21200112, | ||||||
|  |     0x21200211, 0x21210011, 0x21210111, 0x21210210, 0x21210212, 0x21220011, 0x21220110, 0x22200111, | ||||||
|  |     0x22210010, 0x22210012, 0x22210112, 0x22210211, 0x20200022, 0x20200220, 0x20200222, 0x20210020, | ||||||
|  |     0x20210221, 0x20220022, 0x20220220, 0x20220222, 0x21200121, 0x21210021, 0x21210122, 0x21210221, | ||||||
|  |     0x21220121, 0x22200020, 0x22200022, 0x22200220, 0x22200222, 0x22210121, 0x22220020, 0x22220022, | ||||||
|  |     0x22220220, 0x22220222, 0x20211201, 0x20221101, 0x21201001, 0x21201100, 0x21211000, 0x21211100, | ||||||
|  |     0x21211101, 0x21211200, 0x21211202, 0x21221001, 0x21221101, 0x21221102, 0x21221200, 0x21221201, | ||||||
|  |     0x22201101, 0x20201112, 0x20201211, 0x20211010, 0x20211012, 0x20211111, 0x20211210, 0x20221112, | ||||||
|  |     0x20221211, 0x21201012, 0x21201111, 0x21211011, 0x21211110, 0x21211111, 0x21211112, 0x21211211, | ||||||
|  |     0x21221111, 0x21221212, 0x22201011, 0x22201110, 0x22201111, 0x22201112, 0x22201211, 0x22211012, | ||||||
|  |     0x22211111, 0x22211210, 0x20201121, 0x20211021, 0x20211122, 0x20211222, 0x20221021, 0x20221121, | ||||||
|  |     0x21201120, 0x21201122, 0x21201222, 0x21211022, 0x21211121, 0x21211122, 0x21211220, 0x21221020, | ||||||
|  |     0x21221022, 0x22201122, 0x22211020, 0x22211121, 0x22211122, 0x22211221, 0x22221021, 0x22221120, | ||||||
|  |     0x22221122, 0x20202000, 0x20202002, 0x20202200, 0x20202202, 0x20222000, 0x20222002, 0x20222200, | ||||||
|  |     0x20222202, 0x21212001, 0x21212100, 0x21212102, 0x21212201, 0x22202000, 0x22202002, 0x22202200, | ||||||
|  |     0x22202202, 0x22212101, 0x22222000, 0x22222002, 0x22222200, 0x22222202, 0x20202111, 0x20212110, | ||||||
|  |     0x20212211, 0x20222011, 0x20222111, 0x21202011, 0x21212010, 0x21212111, 0x21212212, 0x21222011, | ||||||
|  |     0x21222112, 0x21222211, 0x22212010, 0x22212112, 0x20202020, 0x20202022, 0x20202220, 0x20202222, | ||||||
|  |     0x20222020, 0x20222022, 0x20222220, 0x20222222, 0x21212021, 0x21212120, 0x21212122, 0x22202020, | ||||||
|  |     0x22202022, 0x22202220, 0x22202222, 0x22212121, 0x22222020, 0x22222022, 0x22222220, 0x22222222, | ||||||
|  | GGML_TABLE_END() | ||||||
|  | #endif | ||||||
|  |  | ||||||
| #endif // GGML_COMMON_IMPL | #endif // GGML_COMMON_IMPL | ||||||
|   | |||||||
							
								
								
									
										60
									
								
								ggml-cuda.cu
									
									
									
									
									
								
							
							
						
						
									
										60
									
								
								ggml-cuda.cu
									
									
									
									
									
								
							| @@ -566,7 +566,7 @@ static_assert(sizeof(block_iq3_s) == sizeof(ggml_fp16_t) + 13*(QK_K/32) + IQ3S_N | |||||||
| typedef struct { | typedef struct { | ||||||
|     half d; |     half d; | ||||||
|     uint8_t  qs[QK_K/8]; |     uint8_t  qs[QK_K/8]; | ||||||
|     uint8_t scales[QK_K/16]; |     uint16_t qh[QK_K/32]; | ||||||
| } block_iq1_s; | } block_iq1_s; | ||||||
| static_assert(sizeof(block_iq1_s) == sizeof(ggml_fp16_t) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding"); | static_assert(sizeof(block_iq1_s) == sizeof(ggml_fp16_t) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding"); | ||||||
|  |  | ||||||
| @@ -1722,11 +1722,22 @@ static __global__ void dequantize_block_iq1_s(const void * __restrict__ vx, dst_ | |||||||
|     const int il = tid/8; // 0...3 |     const int il = tid/8; // 0...3 | ||||||
|     const int ib = tid%8; // 0...7 |     const int ib = tid%8; // 0...7 | ||||||
|     dst_t * y = yy + i*QK_K + 32*ib + 8*il; |     dst_t * y = yy + i*QK_K + 32*ib + 8*il; | ||||||
|     const int i8 = 4*ib+il; |     const float d = (float)x[i].d * (2*((x[i].qh[ib] >> 12) & 0xf) + 1); | ||||||
|     uint8_t h = x[i].scales[i8/2] >> 4*(i8%2); | #if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics | ||||||
|     const int8_t * grid = (const int8_t *)(iq1s_grid + (x[i].qs[i8] | ((h & 8) << 5))); |     int grid32[2]; const int8_t * q = (const int8_t *)grid32; | ||||||
|     const float d = (float)x[i].d * (2*(h & 7) + 1); |     grid32[0] = *((const int *)(iq1s_grid_gpu + (x[i].qs[4*ib+il] | (((x[i].qh[ib] >> 3*il) & 7) << 8)))); | ||||||
|     for (int j = 0; j < 8; ++j) y[j] = d * grid[j]; |     grid32[1] = __vsub4((grid32[0] >>  4) & 0x0f0f0f0f, 0x01010101); | ||||||
|  |     grid32[0] = __vsub4(grid32[0] & 0x0f0f0f0f, 0x01010101); | ||||||
|  |     for (int j = 0; j < 8; ++j) { | ||||||
|  |         y[j] = d * q[j]; | ||||||
|  |     } | ||||||
|  | #else | ||||||
|  |     const uint8_t * grid = (const uint8_t *)(iq1s_grid_gpu + (x[i].qs[4*ib+il] | (((x[i].qh[ib] >> 3*il) & 7) << 8))); | ||||||
|  |     for (int j = 0; j < 4; ++j) { | ||||||
|  |         y[j+0] = d * ((grid[j] & 0xf) - 1); | ||||||
|  |         y[j+4] = d * ((grid[j] >>  4) - 1); | ||||||
|  |     } | ||||||
|  | #endif | ||||||
| #else | #else | ||||||
|     assert(false); |     assert(false); | ||||||
| #endif | #endif | ||||||
| @@ -4538,44 +4549,33 @@ static __device__ __forceinline__ float vec_dot_iq3_s_q8_1( | |||||||
| #endif | #endif | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| static __device__ __forceinline__ float vec_dot_iq1_s_q8_1( | static __device__ __forceinline__ float vec_dot_iq1_s_q8_1( | ||||||
|     const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & iqs) { |     const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & iqs) { | ||||||
| #if QK_K == 256 | #if QK_K == 256 | ||||||
|     const block_iq1_s * bq1 = (const block_iq1_s *) vbq; |     const block_iq1_s * bq1 = (const block_iq1_s *) vbq; | ||||||
|  |  | ||||||
|     const int ib32 = iqs; |     const int ib32 = iqs; | ||||||
|     int sumi1 = 0, sumi2 = 0, sumi3 = 0, sumi4 = 0; |     int sumi = 0; | ||||||
|     const uint8_t h1 = bq1->scales[2*ib32+0]; |  | ||||||
|     const uint8_t h2 = bq1->scales[2*ib32+1]; |  | ||||||
| #if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics | #if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics | ||||||
|     const int * q8 = (const int *)bq8_1[ib32].qs; |     const int * q8 = (const int *)bq8_1[ib32].qs; | ||||||
|     const int * grid1 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+0] | ((h1 & 0x08) << 5))); |     for (int l = 0; l < 4; ++l) { | ||||||
|     const int * grid2 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+1] | ((h1 & 0x80) << 1))); |         const int * grid = (const int *)(iq1s_grid_gpu + (bq1->qs[4*ib32+l] | (((bq1->qh[ib32] >> 3*l) & 7) << 8))); | ||||||
|     const int * grid3 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+2] | ((h2 & 0x08) << 5))); |         int grid0 = __vsub4(grid[0] & 0x0f0f0f0f, 0x01010101); | ||||||
|     const int * grid4 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+3] | ((h2 & 0x80) << 1))); |         int grid1 = __vsub4((grid[0] >> 4) & 0x0f0f0f0f, 0x01010101); | ||||||
|     for (int j = 0; j < 2; ++j) { |         sumi = __dp4a(q8[2*l+1], grid1, __dp4a(q8[2*l+0], grid0, sumi)); | ||||||
|         sumi1 = __dp4a(q8[j+0], grid1[j], sumi1); |  | ||||||
|         sumi2 = __dp4a(q8[j+2], grid2[j], sumi2); |  | ||||||
|         sumi3 = __dp4a(q8[j+4], grid3[j], sumi3); |  | ||||||
|         sumi4 = __dp4a(q8[j+6], grid4[j], sumi4); |  | ||||||
|     } |     } | ||||||
| #else | #else | ||||||
|     const int8_t   * q8 = bq8_1[ib32].qs; |     const int8_t   * q8 = bq8_1[ib32].qs; | ||||||
|     const int8_t * grid1 = (const int8_t *)(iq1s_grid + (bq1->qs[4*ib32+0] | ((h1 & 0x08) << 5))); |     for (int l = 0; l < 4; ++l) { | ||||||
|     const int8_t * grid2 = (const int8_t *)(iq1s_grid + (bq1->qs[4*ib32+1] | ((h1 & 0x80) << 1))); |         const uint8_t * grid = (const uint8_t *)(iq1s_grid_gpu + (bq1->qs[4*ib32+l] | (((bq1->qh[ib32] >> 3*l) & 7) << 8))); | ||||||
|     const int8_t * grid3 = (const int8_t *)(iq1s_grid + (bq1->qs[4*ib32+2] | ((h2 & 0x08) << 5))); |         for (int j = 0; j < 4; ++j) { | ||||||
|     const int8_t * grid4 = (const int8_t *)(iq1s_grid + (bq1->qs[4*ib32+3] | ((h2 & 0x80) << 1))); |             sumi += q8[j] * ((grid[j] & 0xf) - 1) + q8[j+4] * ((grid[j] >>  4) - 1); | ||||||
|     for (int j = 0; j < 8; ++j) { |         } | ||||||
|         sumi1 += q8[j+ 0] * grid1[j]; |         q8 += 8; | ||||||
|         sumi2 += q8[j+ 8] * grid2[j]; |  | ||||||
|         sumi3 += q8[j+16] * grid3[j]; |  | ||||||
|         sumi4 += q8[j+24] * grid4[j]; |  | ||||||
|     } |     } | ||||||
| #endif | #endif | ||||||
|     const float d = (float)bq1->d * __low2float(bq8_1[ib32].ds); |     const float d = (float)bq1->d * __low2float(bq8_1[ib32].ds); | ||||||
|     return d * (sumi1 * (2*(h1 & 7) + 1) + sumi2 * (2*((h1 >> 4) & 7) + 1) + |     return d * sumi * (2*(bq1->qh[ib32] >> 12) + 1); | ||||||
|                 sumi3 * (2*(h2 & 7) + 1) + sumi4 * (2*((h2 >> 4) & 7) + 1)); |  | ||||||
| #else | #else | ||||||
|     assert(false); |     assert(false); | ||||||
|     return 0.f; |     return 0.f; | ||||||
|   | |||||||
| @@ -2596,7 +2596,7 @@ typedef struct { | |||||||
| typedef struct { | typedef struct { | ||||||
|     half d; |     half d; | ||||||
|     uint8_t  qs[QK_K/8]; |     uint8_t  qs[QK_K/8]; | ||||||
|     uint8_t scales[QK_K/16]; |     uint16_t qh[QK_K/32]; | ||||||
| } block_iq1_s; | } block_iq1_s; | ||||||
|  |  | ||||||
| // Non-linear quants | // Non-linear quants | ||||||
| @@ -4338,48 +4338,53 @@ void kernel_mul_mv_iq1_s_f32_impl( | |||||||
|     device const block_iq1_s * x = (device const block_iq1_s *) src0 + ib_row + offset0; |     device const block_iq1_s * x = (device const block_iq1_s *) src0 + ib_row + offset0; | ||||||
|     device const float       * y = (device const float       *) src1 + r1*ne10 + im*ne00*ne1; |     device const float       * y = (device const float       *) src1 + r1*ne10 + im*ne00*ne1; | ||||||
|  |  | ||||||
|     float yl[16]; |     float yl[32]; | ||||||
|     float sumf[N_DST]={0.f}, all_sum; |     float sumf[N_DST]={0.f}, all_sum; | ||||||
|  |  | ||||||
|     const int nb32 = nb * (QK_K / 32); |     const int nb32 = nb * (QK_K / 32); | ||||||
|  |  | ||||||
|     const int ix = tiisg/2; |     const int ix = tiisg; | ||||||
|     const int il = tiisg%2; |  | ||||||
|  |  | ||||||
|     device const float * y4 = y + 32 * ix + 16 * il; |     device const float * y4 = y + 32 * ix; | ||||||
|  |  | ||||||
|     for (int ib32 = ix; ib32 < nb32; ib32 += 16) { |     for (int ib32 = ix; ib32 < nb32; ib32 += 32) { | ||||||
|  |  | ||||||
|         for (int i = 0; i < 16; ++i) { |         float sumy = 0; | ||||||
|  |         for (int i = 0; i < 32; ++i) { | ||||||
|             yl[i] = y4[i]; |             yl[i] = y4[i]; | ||||||
|  |             sumy += yl[i]; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         const int ibl = ib32 / (QK_K / 32); |         const int ibl = ib32 / (QK_K / 32); | ||||||
|         const int ib  = ib32 % (QK_K / 32); |         const int ib  = ib32 % (QK_K / 32); | ||||||
|  |  | ||||||
|         device const block_iq1_s * xr = x + ibl; |         device const block_iq1_s * xr = x + ibl; | ||||||
|         device const uint8_t * qs = xr->qs + 4 * ib + 2 * il; |         device const uint8_t  * qs = xr->qs + 4 * ib; | ||||||
|         device const uint8_t * sc = xr->scales + 2 * ib + il; |         device const uint16_t * qh = xr->qh + ib; | ||||||
|         device const half     * dh = &xr->d; |         device const half     * dh = &xr->d; | ||||||
|  |  | ||||||
|         for (int row = 0; row < N_DST; row++) { |         for (int row = 0; row < N_DST; row++) { | ||||||
|  |  | ||||||
|             constant int8_t * grid1 = (constant int8_t *)(iq1s_grid + (qs[0] | ((sc[0] & 0x08) << 5))); |             constant uint8_t * grid1 = (constant uint8_t *)(iq1s_grid_gpu + (qs[0] | ((qh[0] << 8) & 0x700))); | ||||||
|             constant int8_t * grid2 = (constant int8_t *)(iq1s_grid + (qs[1] | ((sc[0] & 0x80) << 1))); |             constant uint8_t * grid2 = (constant uint8_t *)(iq1s_grid_gpu + (qs[1] | ((qh[0] << 5) & 0x700))); | ||||||
|  |             constant uint8_t * grid3 = (constant uint8_t *)(iq1s_grid_gpu + (qs[2] | ((qh[0] << 2) & 0x700))); | ||||||
|  |             constant uint8_t * grid4 = (constant uint8_t *)(iq1s_grid_gpu + (qs[3] | ((qh[0] >> 1) & 0x700))); | ||||||
|  |  | ||||||
|             float2 sum = {0}; |             float sum = 0; | ||||||
|             for (int j = 0; j < 8; ++j) { |             for (int j = 0; j < 4; ++j) { | ||||||
|                 sum[0] += yl[j+ 0] * grid1[j]; |                 sum += yl[j+ 0] * (grid1[j] & 0xf) + yl[j+ 4] * (grid1[j] >> 4) | ||||||
|                 sum[1] += yl[j+ 8] * grid2[j]; |                      + yl[j+ 8] * (grid2[j] & 0xf) + yl[j+12] * (grid2[j] >> 4) | ||||||
|  |                      + yl[j+16] * (grid3[j] & 0xf) + yl[j+20] * (grid3[j] >> 4) | ||||||
|  |                      + yl[j+24] * (grid4[j] & 0xf) + yl[j+28] * (grid4[j] >> 4); | ||||||
|             } |             } | ||||||
|             sumf[row] += (float)dh[0] * (sum[0] * (2*(sc[0] & 7) + 1) + sum[1] * (2*((sc[0] >> 4) & 7) + 1)); |             sumf[row] += (float)dh[0] * (sum - sumy) * (2*(qh[0] >> 12) + 1); | ||||||
|  |  | ||||||
|             dh += nb*sizeof(block_iq1_s)/2; |             dh += nb*sizeof(block_iq1_s)/2; | ||||||
|             qs += nb*sizeof(block_iq1_s); |             qs += nb*sizeof(block_iq1_s); | ||||||
|             sc += nb*sizeof(block_iq1_s); |             qh += nb*sizeof(block_iq1_s)/2; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         y4 += 16 * 32; |         y4 += 32 * 32; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     for (int row = 0; row < N_DST; ++row) { |     for (int row = 0; row < N_DST; ++row) { | ||||||
| @@ -5066,16 +5071,19 @@ void dequantize_iq2_s(device const block_iq2_s * xb, short il, thread type4x4 & | |||||||
| template <typename type4x4> | template <typename type4x4> | ||||||
| void dequantize_iq1_s(device const block_iq1_s * xb, short il, thread type4x4 & reg) { | void dequantize_iq1_s(device const block_iq1_s * xb, short il, thread type4x4 & reg) { | ||||||
|     // il is 0...15 for QK_K = 256 => index of block of 32 is il/2 |     // il is 0...15 for QK_K = 256 => index of block of 32 is il/2 | ||||||
|  |     const int ib32 = il/2; | ||||||
|  |     il = il%2; | ||||||
|     const float d = xb->d; |     const float d = xb->d; | ||||||
|     device const uint8_t * qs = xb->qs + 2*il; |     device const uint8_t  * qs = xb->qs + 4*ib32 + 2*il; | ||||||
|     device const uint8_t * sc = xb->scales + il; |     device const uint16_t * qh = xb->qh; | ||||||
|     const float dl1 = d * (2*(sc[0] & 7) + 1); |     const float dl = d * (2*(qh[ib32] >> 12) + 1); | ||||||
|     const float dl2 = d * (2*((sc[0] >> 4) & 7) + 1); |     constant uint8_t * grid1 = (constant uint8_t *)(iq1s_grid_gpu + (qs[0] | (((qh[ib32] >> (6*il+0)) & 7) << 8))); | ||||||
|     constant int8_t * grid1 = (constant int8_t *)(iq1s_grid + (qs[0] | ((sc[0] & 0x08) << 5))); |     constant uint8_t * grid2 = (constant uint8_t *)(iq1s_grid_gpu + (qs[1] | (((qh[ib32] >> (6*il+3)) & 7) << 8))); | ||||||
|     constant int8_t * grid2 = (constant int8_t *)(iq1s_grid + (qs[1] | ((sc[0] & 0x80) << 1))); |     for (int i = 0; i < 4; ++i) { | ||||||
|     for (int i = 0; i < 8; ++i) { |         reg[0][i] = dl * (grid1[i] & 0xf) - dl; | ||||||
|         reg[i/4+0][i%4] = dl1 * grid1[i]; |         reg[1][i] = dl * (grid1[i] >>  4) - dl; | ||||||
|         reg[i/4+2][i%4] = dl2 * grid2[i]; |         reg[2][i] = dl * (grid2[i] & 0xf) - dl; | ||||||
|  |         reg[3][i] = dl * (grid2[i] >>  4) - dl; | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										484
									
								
								ggml-quants.c
									
									
									
									
									
								
							
							
						
						
									
										484
									
								
								ggml-quants.c
									
									
									
									
									
								
							| @@ -3449,39 +3449,22 @@ void dequantize_row_iq1_s(const block_iq1_s * restrict x, float * restrict y, in | |||||||
|     assert(k % QK_K == 0); |     assert(k % QK_K == 0); | ||||||
|     const int nb = k / QK_K; |     const int nb = k / QK_K; | ||||||
|  |  | ||||||
|     float db[4]; |  | ||||||
|     uint16_t idx[4]; |  | ||||||
|     //const int8_t * grid[4]; |  | ||||||
|  |  | ||||||
|     for (int i = 0; i < nb; i++) { |     for (int i = 0; i < nb; i++) { | ||||||
|  |  | ||||||
|         const float d = GGML_FP16_TO_FP32(x[i].d); |         const float d = GGML_FP16_TO_FP32(x[i].d); | ||||||
|         const uint8_t * sc = x[i].scales; |  | ||||||
|         const uint8_t  * qs = x[i].qs; |         const uint8_t  * qs = x[i].qs; | ||||||
|  |         const uint16_t * qh = x[i].qh; | ||||||
|  |  | ||||||
|         for (int i8 = 0; i8 < QK_K/8; i8 += 4) { |         for (int ib = 0; ib < QK_K/32; ++ib) { | ||||||
|             idx[0] = qs[0] | ((sc[0] & 0x08) << 5); |             const float dl = d * (2*(qh[ib] >> 12) + 1); | ||||||
|             idx[1] = qs[1] | ((sc[0] & 0x80) << 1); |  | ||||||
|             idx[2] = qs[2] | ((sc[1] & 0x08) << 5); |  | ||||||
|             idx[3] = qs[3] | ((sc[1] & 0x80) << 1); |  | ||||||
|             //grid[0] = (const int8_t *)(iq1s_grid + (qs[0] | ((sc[0] & 0x08) << 5))); |  | ||||||
|             //grid[1] = (const int8_t *)(iq1s_grid + (qs[1] | ((sc[0] & 0x80) << 1))); |  | ||||||
|             //grid[2] = (const int8_t *)(iq1s_grid + (qs[2] | ((sc[1] & 0x08) << 5))); |  | ||||||
|             //grid[3] = (const int8_t *)(iq1s_grid + (qs[3] | ((sc[1] & 0x80) << 1))); |  | ||||||
|             db[0] = d * (2*(sc[0] & 7) + 1); |  | ||||||
|             db[1] = d * (2*((sc[0] >> 4) & 7) + 1); |  | ||||||
|             db[2] = d * (2*(sc[1] & 7) + 1); |  | ||||||
|             db[3] = d * (2*((sc[1] >> 4) & 7) + 1); |  | ||||||
|             for (int l = 0; l < 4; ++l) { |             for (int l = 0; l < 4; ++l) { | ||||||
|                 const int8_t * grid = (const int8_t *)(iq1s_grid + idx[l]); |                 const int8_t * grid = (const int8_t *)(iq1s_grid + (qs[l] | (((qh[ib] >> 3*l) & 7) << 8))); | ||||||
|                 for (int j = 0; j < 8; ++j) { |                 for (int j = 0; j < 8; ++j) { | ||||||
|                     //y[j] = db[l] * grid[l][j]; |                     y[j] = dl * grid[j]; | ||||||
|                     y[j] = db[l] * grid[j]; |  | ||||||
|                 } |                 } | ||||||
|                 y += 8; |                 y += 8; | ||||||
|             } |             } | ||||||
|             qs += 4; |             qs += 4; | ||||||
|             sc += 2; |  | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -9587,113 +9570,72 @@ void ggml_vec_dot_iq1_s_q8_K  (int n, float * restrict s, size_t bs, const void | |||||||
|  |  | ||||||
|     const int nb = n / QK_K; |     const int nb = n / QK_K; | ||||||
|  |  | ||||||
|     // TODO: implement for QK_K = 64 | #if defined __ARM_NEON | ||||||
| #if defined __ARM_NEON && QK_K == 256 |  | ||||||
|  |  | ||||||
|     const uint8x16_t m8 = vdupq_n_u8(0x08); |     ggml_int8x16x4_t q1b; | ||||||
|     const uint8x16_t m7 = vdupq_n_u8(0x07); |  | ||||||
|     const uint8x16_t m1 = vdupq_n_u8(0x01); |  | ||||||
|     const int32x4_t vzero = vdupq_n_s32(0); |  | ||||||
|  |  | ||||||
|     uint16_t gindex[8]; |  | ||||||
|     uint16x8x2_t vindex; |  | ||||||
|     int8x16x4_t q1b; |  | ||||||
|     ggml_int8x16x4_t q8b; |     ggml_int8x16x4_t q8b; | ||||||
|     uint16x8x4_t scales; |  | ||||||
|     int32x4x2_t sumi; |  | ||||||
|     int32x4x2_t dotq; |  | ||||||
|  |  | ||||||
|     float sumf = 0; |     float sumf = 0; | ||||||
|     for (int i = 0; i < nb; ++i) { |     for (int i = 0; i < nb; ++i) { | ||||||
|  |  | ||||||
|         const int8_t   * q8 = y[i].qs; |         const int8_t   * q8 = y[i].qs; | ||||||
|         const uint8_t  * qs = x[i].qs; |         const uint8_t  * qs = x[i].qs; | ||||||
|         const uint8_t * sc = x[i].scales; |         const uint16_t * qh = x[i].qh; | ||||||
|  |  | ||||||
|         sumi.val[0] = sumi.val[1] = vzero; |         int sumi1 = 0, sumi2 = 0; | ||||||
|  |  | ||||||
|         for (int i128 = 0; i128 < QK_K/128; ++i128) { |         for (int ib = 0; ib < QK_K/32; ib += 2) { | ||||||
|             const uint8x16_t ql = vld1q_u8(qs); qs += 16; |  | ||||||
|             const uint8x8_t tm1 = vld1_u8 (sc); sc +=  8; |             q1b.val[0] = vcombine_s8(vld1_s8((const int8_t *)(iq1s_grid + (qs[0] | ((qh[ib+0] << 8) & 0x700)))), | ||||||
|             const uint8x8_t tm2 = vshr_n_u8(tm1, 4); |                                      vld1_s8((const int8_t *)(iq1s_grid + (qs[1] | ((qh[ib+0] << 5) & 0x700))))); | ||||||
|             const uint8x16_t qh = vcombine_u8(vzip1_u8(tm1, tm2), vzip2_u8(tm1, tm2)); |             q1b.val[1] = vcombine_s8(vld1_s8((const int8_t *)(iq1s_grid + (qs[2] | ((qh[ib+0] << 2) & 0x700)))), | ||||||
|             const uint8x16_t hbit = vandq_u8(qh, m8); |                                      vld1_s8((const int8_t *)(iq1s_grid + (qs[3] | ((qh[ib+0] >> 1) & 0x700))))); | ||||||
|             vindex.val[0] = vorrq_u16(vmovl_u8(vget_low_u8 (ql)), vshlq_n_u16(vmovl_u8(vget_low_u8 (hbit)), 5)); |             q1b.val[2] = vcombine_s8(vld1_s8((const int8_t *)(iq1s_grid + (qs[4] | ((qh[ib+1] << 8) & 0x700)))), | ||||||
|             vindex.val[1] = vorrq_u16(vmovl_u8(vget_high_u8(ql)), vshlq_n_u16(vmovl_u8(vget_high_u8(hbit)), 5)); |                                      vld1_s8((const int8_t *)(iq1s_grid + (qs[5] | ((qh[ib+1] << 5) & 0x700))))); | ||||||
|             const uint8x16_t scales8 = vorrq_u8(vshlq_n_u8(vandq_u8(qh, m7), 1), m1); |             q1b.val[3] = vcombine_s8(vld1_s8((const int8_t *)(iq1s_grid + (qs[6] | ((qh[ib+1] << 2) & 0x700)))), | ||||||
|             scales.val[0] = vmovl_u8(vget_low_u8 (scales8)); |                                      vld1_s8((const int8_t *)(iq1s_grid + (qs[7] | ((qh[ib+1] >> 1) & 0x700))))); | ||||||
|             scales.val[1] = vmovl_u8(vget_high_u8 (scales8)); |             qs += 8; | ||||||
|  |  | ||||||
|             for (int l = 0; l < 2; ++l) { |  | ||||||
|                 vst1q_u16(gindex+0, vindex.val[l]); |  | ||||||
|                 q1b.val[0] = vcombine_s8(vld1_s8((const void *)(iq1s_grid+gindex[0])), vld1_s8((const void *)(iq1s_grid+gindex[1]))); |  | ||||||
|                 q1b.val[1] = vcombine_s8(vld1_s8((const void *)(iq1s_grid+gindex[2])), vld1_s8((const void *)(iq1s_grid+gindex[3]))); |  | ||||||
|                 q1b.val[2] = vcombine_s8(vld1_s8((const void *)(iq1s_grid+gindex[4])), vld1_s8((const void *)(iq1s_grid+gindex[5]))); |  | ||||||
|                 q1b.val[3] = vcombine_s8(vld1_s8((const void *)(iq1s_grid+gindex[6])), vld1_s8((const void *)(iq1s_grid+gindex[7]))); |  | ||||||
|             q8b = ggml_vld1q_s8_x4(q8); q8 += 64; |             q8b = ggml_vld1q_s8_x4(q8); q8 += 64; | ||||||
|  |  | ||||||
|                 dotq.val[0] = vpaddq_s32(ggml_vdotq_s32(vzero, q1b.val[0], q8b.val[0]), ggml_vdotq_s32(vzero, q1b.val[1], q8b.val[1])); |             const int32x4_t p1 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q1b.val[0], q8b.val[0]), q1b.val[1], q8b.val[1]); | ||||||
|                 dotq.val[1] = vpaddq_s32(ggml_vdotq_s32(vzero, q1b.val[2], q8b.val[2]), ggml_vdotq_s32(vzero, q1b.val[3], q8b.val[3])); |             const int32x4_t p2 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q1b.val[2], q8b.val[2]), q1b.val[3], q8b.val[3]); | ||||||
|  |  | ||||||
|  |             sumi1 += vaddvq_s32(p1) * (2*(qh[ib+0] >> 12) + 1); | ||||||
|  |             sumi2 += vaddvq_s32(p2) * (2*(qh[ib+1] >> 12) + 1); | ||||||
|  |  | ||||||
|                 sumi.val[0] = vmlaq_s32(sumi.val[0], dotq.val[0], vreinterpretq_s32_u32(vmovl_u16(vget_low_u16 (scales.val[l])))); |  | ||||||
|                 sumi.val[1] = vmlaq_s32(sumi.val[1], dotq.val[1], vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(scales.val[l])))); |  | ||||||
|             } |  | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         sumf += y[i].d * GGML_FP16_TO_FP32(x[i].d) * vaddvq_s32(vaddq_s32(sumi.val[0], sumi.val[1])); |         sumf += y[i].d * GGML_FP16_TO_FP32(x[i].d) * (sumi1 + sumi2); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     *s = sumf; |     *s = sumf; | ||||||
|  |  | ||||||
|     // TODO: implement for QK_K = 64 | #elif defined __AVX2__ | ||||||
| #elif defined __AVX2__ && QK_K == 256 |  | ||||||
|  |  | ||||||
|     const __m128i m8 = _mm_set1_epi8(0x08); |  | ||||||
|     const __m128i m7 = _mm_set1_epi8(0x07); |  | ||||||
|     const __m128i m1 = _mm_set1_epi8(0x01); |  | ||||||
|     const __m128i shuffle_h = _mm_set_epi8(15, 7, 14, 6, 13, 5, 12, 4, 11, 3, 10, 2, 9, 1, 8, 0); |  | ||||||
|     const __m128i shuffle_s[4] = { |  | ||||||
|         _mm_set_epi32(0x03030303, 0x02020202, 0x01010101, 0x00000000), |  | ||||||
|         _mm_set_epi32(0x07070707, 0x06060606, 0x05050505, 0x04040404), |  | ||||||
|         _mm_set_epi32(0x0b0b0b0b, 0x0a0a0a0a, 0x09090909, 0x08080808), |  | ||||||
|         _mm_set_epi32(0x0f0f0f0f, 0x0e0e0e0e, 0x0d0d0d0d, 0x0c0c0c0c) |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     uint64_t aux64; |  | ||||||
|  |  | ||||||
|     typedef union m256i_uint16 { |  | ||||||
|         __m256i reg; |  | ||||||
|         uint16_t s[16]; |  | ||||||
|     } m256i_uint16_t; |  | ||||||
|  |  | ||||||
|     m256i_uint16_t v_gindex; |  | ||||||
|  |  | ||||||
|     __m256 accum = _mm256_setzero_ps(); |     __m256 accum = _mm256_setzero_ps(); | ||||||
|     for (int i = 0; i < nb; ++i) { |     for (int i = 0; i < nb; ++i) { | ||||||
|  |  | ||||||
|         const int8_t   * q8 = y[i].qs; |         const int8_t   * q8 = y[i].qs; | ||||||
|         const uint8_t  * qs = x[i].qs; |         const uint8_t  * qs = x[i].qs; | ||||||
|         const uint8_t * sc = x[i].scales; |         const uint16_t * qh = x[i].qh; | ||||||
|  |  | ||||||
|         __m256i sumi = _mm256_setzero_si256(); |         __m256i sumi = _mm256_setzero_si256(); | ||||||
|         for (int i128 = 0; i128 < QK_K/128; ++i128) { |         for (int ib = 0; ib < QK_K/32; ib += 2) { | ||||||
|             const __m128i ql = _mm_loadu_si128((const __m128i*)qs); qs += 16; |             const __m256i q1b_1 = _mm256_set_epi64x(iq1s_grid[qs[3] | ((qh[ib+0] >> 1) & 0x700)], iq1s_grid[qs[2] | ((qh[ib+0] << 2) & 0x700)], | ||||||
|             memcpy(&aux64, sc, 8); sc += 8; |                                                     iq1s_grid[qs[1] | ((qh[ib+0] << 5) & 0x700)], iq1s_grid[qs[0] | ((qh[ib+0] << 8) & 0x700)]); | ||||||
|             const __m128i qh = _mm_shuffle_epi8(_mm_set_epi64x(aux64 >> 4, aux64), shuffle_h); |             const __m256i q1b_2 = _mm256_set_epi64x(iq1s_grid[qs[7] | ((qh[ib+1] >> 1) & 0x700)], iq1s_grid[qs[6] | ((qh[ib+1] << 2) & 0x700)], | ||||||
|             const __m256i hbit = _mm256_cvtepu8_epi16(_mm_and_si128(qh, m8)); |                                                     iq1s_grid[qs[5] | ((qh[ib+1] << 5) & 0x700)], iq1s_grid[qs[4] | ((qh[ib+1] << 8) & 0x700)]); | ||||||
|             v_gindex.reg = _mm256_or_si256(_mm256_cvtepu8_epi16(ql), _mm256_slli_epi16(hbit, 5)); |             qs += 8; | ||||||
|             const __m128i scales = _mm_or_si128(_mm_slli_epi16(_mm_and_si128(qh, m7), 1), m1); |             const __m256i q8b_1 = _mm256_loadu_si256((const __m256i*)q8); q8 += 32; | ||||||
|  |             const __m256i q8b_2 = _mm256_loadu_si256((const __m256i*)q8); q8 += 32; | ||||||
|  |  | ||||||
|             for (int i32 = 0; i32 < 4; ++i32) { |             const __m256i dot1 = mul_add_epi8(q1b_1, q8b_1); | ||||||
|                 const __m256i q8b = _mm256_loadu_si256((const __m256i*)q8); q8 += 32; |             const __m256i dot2 = mul_add_epi8(q1b_2, q8b_2); | ||||||
|                 const __m256i q1b = _mm256_set_epi64x(iq1s_grid[v_gindex.s[4*i32+3]], iq1s_grid[v_gindex.s[4*i32+2]], |             const __m256i p1 = _mm256_madd_epi16(dot1, _mm256_set1_epi16(2*(qh[ib+0] >> 12) + 1)); | ||||||
|                                                       iq1s_grid[v_gindex.s[4*i32+1]], iq1s_grid[v_gindex.s[4*i32+0]]); |             const __m256i p2 = _mm256_madd_epi16(dot2, _mm256_set1_epi16(2*(qh[ib+1] >> 12) + 1)); | ||||||
|                 const __m256i dot = mul_add_epi8(q1b, q8b); |  | ||||||
|                 const __m256i s16 = _mm256_cvtepi8_epi16(_mm_shuffle_epi8(scales, shuffle_s[i32])); |  | ||||||
|                 const __m256i p   = _mm256_madd_epi16(s16, dot); |  | ||||||
|                 sumi = _mm256_add_epi32(sumi, p); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|  |             sumi = _mm256_add_epi32(sumi, _mm256_add_epi32(p1, p2)); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         accum = _mm256_fmadd_ps(_mm256_set1_ps(y[i].d * GGML_FP16_TO_FP32(x[i].d)), _mm256_cvtepi32_ps(sumi), accum); |         accum = _mm256_fmadd_ps(_mm256_set1_ps(y[i].d * GGML_FP16_TO_FP32(x[i].d)), _mm256_cvtepi32_ps(sumi), accum); | ||||||
| @@ -9704,35 +9646,26 @@ void ggml_vec_dot_iq1_s_q8_K  (int n, float * restrict s, size_t bs, const void | |||||||
|  |  | ||||||
| #else | #else | ||||||
|  |  | ||||||
|     int db[4]; |  | ||||||
|     uint16_t idx[4]; |  | ||||||
|  |  | ||||||
|     float sumf = 0; |     float sumf = 0; | ||||||
|     for (int i = 0; i < nb; ++i) { |     for (int i = 0; i < nb; i++) { | ||||||
|  |  | ||||||
|         const int8_t   * q8 = y[i].qs; |         const int8_t   * q8 = y[i].qs; | ||||||
|         const uint8_t  * qs = x[i].qs; |         const uint8_t  * qs = x[i].qs; | ||||||
|         const uint8_t * sc = x[i].scales; |         const uint16_t * qh = x[i].qh; | ||||||
|  |  | ||||||
|         int sumi = 0; |         int sumi = 0; | ||||||
|         for (int i32 = 0; i32 < QK_K/32; ++i32) { |         for (int ib = 0; ib < QK_K/32; ++ib) { | ||||||
|             idx[0] = qs[0] | ((sc[0] & 0x08) << 5); |             const int ls = 2*(qh[ib] >> 12) + 1; | ||||||
|             idx[1] = qs[1] | ((sc[0] & 0x80) << 1); |             int lsum = 0; | ||||||
|             idx[2] = qs[2] | ((sc[1] & 0x08) << 5); |  | ||||||
|             idx[3] = qs[3] | ((sc[1] & 0x80) << 1); |  | ||||||
|             db[0] = (2*(sc[0] & 7) + 1); |  | ||||||
|             db[1] = (2*((sc[0] >> 4) & 7) + 1); |  | ||||||
|             db[2] = (2*(sc[1] & 7) + 1); |  | ||||||
|             db[3] = (2*((sc[1] >> 4) & 7) + 1); |  | ||||||
|             for (int l = 0; l < 4; ++l) { |             for (int l = 0; l < 4; ++l) { | ||||||
|                 const int8_t * grid = (const int8_t *)(iq1s_grid + idx[l]); |                 const int8_t * grid = (const int8_t *)(iq1s_grid + (qs[l] | (((qh[ib] >> 3*l) & 7) << 8))); | ||||||
|                 int suml = 0; |                 for (int j = 0; j < 8; ++j) { | ||||||
|                 for (int j = 0; j < 8; ++j) suml += q8[j] * grid[j]; |                     lsum += q8[j] * grid[j]; | ||||||
|                 sumi += db[l] * suml; |                 } | ||||||
|                 q8 += 8; |                 q8 += 8; | ||||||
|             } |             } | ||||||
|  |             sumi += ls * lsum; | ||||||
|             qs += 4; |             qs += 4; | ||||||
|             sc += 2; |  | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         sumf += GGML_FP16_TO_FP32(x[i].d) * y[i].d * sumi; |         sumf += GGML_FP16_TO_FP32(x[i].d) * y[i].d * sumi; | ||||||
| @@ -9996,7 +9929,7 @@ static inline int iq2_grid_size(enum ggml_type type) { | |||||||
|     GGML_ASSERT(type == GGML_TYPE_IQ2_XXS || type == GGML_TYPE_IQ2_XS || type == GGML_TYPE_IQ1_S || type == GGML_TYPE_IQ2_S); |     GGML_ASSERT(type == GGML_TYPE_IQ2_XXS || type == GGML_TYPE_IQ2_XS || type == GGML_TYPE_IQ1_S || type == GGML_TYPE_IQ2_S); | ||||||
|     return type == GGML_TYPE_IQ2_XXS ? 256 : |     return type == GGML_TYPE_IQ2_XXS ? 256 : | ||||||
|            type == GGML_TYPE_IQ2_XS  ? 512 : |            type == GGML_TYPE_IQ2_XS  ? 512 : | ||||||
|            type == GGML_TYPE_IQ1_S   ? 512 : 1024; |            type == GGML_TYPE_IQ1_S   ? NGRID_IQ1S : 1024; | ||||||
| } | } | ||||||
|  |  | ||||||
| static int iq2_compare_func(const void * left, const void * right) { | static int iq2_compare_func(const void * left, const void * right) { | ||||||
| @@ -10063,39 +9996,135 @@ void iq2xs_init_impl(enum ggml_type type) { | |||||||
|         40962, 40968, 40970, 40992, 41002, 41120, 41297, 41305, 41382, 41472, 41474, 41480, 41514, 41600, 41632, 42048, |         40962, 40968, 40970, 40992, 41002, 41120, 41297, 41305, 41382, 41472, 41474, 41480, 41514, 41600, 41632, 42048, | ||||||
|         42133, 42597, 42648, 43018, 43040, 43042, 43048, 43168, 43176, 43268, 43396, 43398, 43560, 43562, 43665, 43690, |         42133, 42597, 42648, 43018, 43040, 43042, 43048, 43168, 43176, 43268, 43396, 43398, 43560, 43562, 43665, 43690, | ||||||
|     }; |     }; | ||||||
|     static const uint16_t kgrid_1bit_512[512] = { |     static const uint16_t kgrid_1bit_2048[NGRID_IQ1S] = { | ||||||
|            10,    33,    41,    85,   132,   134,   160,   162,   277,   337,   340,   345,   357,   405,   516,   545, |             0,     2,     5,     8,    10,    17,    21,    32,    34,    40,    42,    69,    81,    84,    86,   101, | ||||||
|           553,   598,   641,   650,   681,  1042,  1044,  1097,  1169,  1176,  1320,  1345,  1365,  1378,  1434,  1444, |           128,   130,   136,   138,   149,   160,   162,   168,   170,   260,   261,   273,   276,   278,   281,   282, | ||||||
|          1545,  1617,  1642,  1685,  2053,  2080,  2089,  2133,  2176,  2182,  2208,  2214,  2306,  2384,  2393,  2440, |           293,   321,   326,   329,   338,   341,   346,   353,   356,   358,   360,   389,   401,   404,   406,   421, | ||||||
|          2453,  2581,  2664,  2690,  2721,  4117,  4161,  4182,  4184,  4261,  4357,  4369,  4372,  4377,  4390,  4422, |           512,   514,   520,   522,   533,   544,   546,   552,   554,   581,   593,   601,   612,   617,   640,   642, | ||||||
|          4432,  4437,  4449,  4457,  4485,  4497,  4505,  4629,  4677,  4696,  4774,  5205,  5217,  5225,  5386,  5397, |           648,   650,   657,   661,   665,   672,   674,   680,   682,  1041,  1044,  1046,  1061,  1089,  1097,  1109, | ||||||
|          5409,  5445,  5457,  5460,  5461,  5462,  5465,  5472,  5477,  5525,  5545,  5650,  5668,  5717,  5729,  5769, |          1114,  1124,  1125,  1169,  1177,  1189,  1281,  1284,  1285,  1286,  1301,  1304,  1306,  1321,  1344,  1349, | ||||||
|          5777,  6212,  6234,  6244,  6293,  6424,  6482,  6485,  6502,  6505,  6529,  6538,  6565,  6656,  6682,  6788, |          1354,  1360,  1361,  1364,  1365,  1366,  1369,  1376,  1378,  1381,  1384,  1386,  1409,  1425,  1429,  1432, | ||||||
|          6806,  6820,  8218,  8224,  8226,  8232,  8277,  8326,  8354,  8469,  8521,  8530,  8549,  8596,  8737,  8794, |          1434,  1441,  1444,  1445,  1446,  1449,  1556,  1561,  1601,  1604,  1616,  1618,  1621,  1624,  1632,  1633, | ||||||
|          9221,  9253,  9348,  9369,  9380,  9474,  9557,  9633,  9732,  9753,  9793,  9830,  9862,  9880, 10240, 10272, |          1638,  1641,  1669,  1681,  1684,  1689,  2048,  2050,  2056,  2058,  2069,  2080,  2082,  2088,  2090,  2117, | ||||||
|         10282, 10321, 10406, 10517, 10530, 10566, 10585, 10645, 10896, 16466, 16468, 16473, 16485, 16646, 16660, 16665, |          2129,  2134,  2149,  2176,  2178,  2184,  2186,  2197,  2208,  2210,  2216,  2218,  2309,  2321,  2324,  2329, | ||||||
|         16725, 16793, 16806, 16914, 16969, 16977, 16996, 17028, 17057, 17408, 17416, 17434, 17493, 17512, 17578, 17685, |          2340,  2341,  2369,  2384,  2385,  2389,  2401,  2404,  2409,  2449,  2452,  2454,  2457,  2469,  2560,  2562, | ||||||
|         17696, 17733, 17745, 17748, 17749, 17750, 17753, 17765, 17794, 17813, 17946, 17984, 18005, 18072, 18453, 18529, |          2568,  2570,  2581,  2592,  2594,  2600,  2602,  2629,  2641,  2649,  2657,  2661,  2688,  2690,  2693,  2696, | ||||||
|         18569, 18722, 18756, 18762, 18773, 18794, 18833, 18853, 18945, 19026, 19033, 19077, 20489, 20497, 20500, 20517, |          2698,  2709,  2720,  2722,  2728,  2730,  4112,  4113,  4116,  4121,  4132,  4133,  4161,  4164,  4176,  4181, | ||||||
|         20565, 20586, 20610, 20633, 20757, 20769, 20776, 20805, 20817, 20820, 20821, 20822, 20825, 20837, 20864, 20872, |          4184,  4193,  4196,  4197,  4201,  4241,  4244,  4246,  4257,  4261,  4353,  4356,  4358,  4361,  4368,  4370, | ||||||
|         20885, 20896, 21002, 21029, 21077, 21146, 21510, 21525, 21573, 21585, 21588, 21589, 21590, 21593, 21605, 21653, |          4373,  4376,  4385,  4388,  4393,  4421,  4426,  4432,  4433,  4434,  4436,  4437,  4438,  4441,  4448,  4453, | ||||||
|         21665, 21765, 21777, 21780, 21781, 21782, 21785, 21797, 21825, 21828, 21829, 21830, 21833, 21840, 21841, 21842, |          4484,  4498,  4501,  4513,  4516,  4625,  4628,  4630,  4645,  4672,  4678,  4681,  4690,  4693,  4696,  4698, | ||||||
|         21844, 21846, 21848, 21849, 21850, 21857, 21860, 21861, 21862, 21865, 21893, 21905, 21908, 21909, 21910, 21913, |          4708,  4710,  4741,  4753,  4756,  4758,  4773,  5121,  5126,  5129,  5140,  5141,  5144,  5145,  5153,  5158, | ||||||
|         21925, 22024, 22037, 22085, 22097, 22100, 22101, 22102, 22105, 22117, 22165, 22545, 22566, 22568, 22594, 22608, |          5185,  5189,  5190,  5192,  5194,  5201,  5204,  5205,  5206,  5209,  5218,  5221,  5224,  5252,  5257,  5264, | ||||||
|         22613, 22676, 22697, 22793, 22805, 22853, 22865, 22868, 22869, 22870, 22873, 22885, 22933, 22946, 23046, 23072, |          5268,  5269,  5272,  5273,  5274,  5281,  5284,  5285,  5289,  5378,  5381,  5386,  5393,  5396,  5397,  5398, | ||||||
|         23125, 23209, 24597, 24640, 24665, 24673, 24725, 24833, 24840, 24869, 24917, 24934, 24965, 25001, 25108, 25110, |          5401,  5408,  5410,  5413,  5416,  5418,  5441,  5444,  5445,  5446,  5457,  5458,  5460,  5461,  5462,  5465, | ||||||
|         25152, 25184, 25192, 25234, 25616, 25618, 25625, 25685, 25704, 25738, 25744, 25770, 25877, 25897, 25925, 25937, |          5466,  5473,  5476,  5477,  5478,  5481,  5504,  5506,  5508,  5509,  5512,  5514,  5520,  5521,  5524,  5525, | ||||||
|         25940, 25941, 25942, 25945, 25957, 25986, 26005, 26186, 26197, 26276, 26632, 26634, 26725, 26757, 26770, 26885, |          5526,  5529,  5530,  5536,  5538,  5541,  5633,  5636,  5637,  5638,  5653,  5654,  5656,  5658,  5665,  5670, | ||||||
|         26965, 26976, 26986, 27032, 27153, 27174, 27200, 27208, 27240, 27269, 27282, 27290, 32778, 32800, 32802, 32808, |          5696,  5698,  5700,  5701,  5704,  5706,  5713,  5717,  5718,  5720,  5721,  5729,  5732,  5733,  5736,  5737, | ||||||
|         32810, 32853, 32904, 32922, 32930, 32932, 33105, 33110, 33112, 33125, 33157, 33280, 33288, 33301, 33312, 33320, |          5738,  5766,  5770,  5778,  5781,  5796,  5801,  6161,  6166,  6181,  6209,  6212,  6214,  6217,  6224,  6229, | ||||||
|         33424, 33797, 33829, 33858, 34068, 34133, 34146, 34176, 34217, 34306, 34342, 34441, 34454, 34468, 34832, 34918, |          6232,  6234,  6240,  6241,  6244,  6246,  6249,  6277,  6289,  6292,  6309,  6416,  6418,  6421,  6426,  6433, | ||||||
|         34965, 34984, 35094, 35137, 35161, 35208, 35232, 35332, 35338, 35368, 35429, 36932, 36934, 36953, 37009, 37125, |          6437,  6466,  6468,  6469,  6472,  6481,  6484,  6485,  6486,  6489,  6490,  6496,  6501,  6506,  6537,  6545, | ||||||
|         37136, 37138, 37145, 37157, 37205, 37220, 37258, 37290, 37444, 37446, 37465, 37478, 37525, 37905, 37968, 37973, |          6546,  6549,  6552,  6561,  6566,  6569,  6665,  6678,  6692,  6694,  6724,  6726,  6729,  6736,  6738,  6741, | ||||||
|         38040, 38054, 38145, 38154, 38165, 38180, 38186, 38213, 38225, 38228, 38229, 38230, 38233, 38245, 38293, 38485, |          6744,  6753,  6758,  6761,  6789,  6801,  6806,  6810,  8192,  8194,  8200,  8202,  8213,  8224,  8226,  8229, | ||||||
|         38504, 38530, 38938, 38985, 38993, 39012, 39040, 39173, 39192, 39253, 39265, 39301, 39316, 39322, 39442, 39497, |          8232,  8234,  8261,  8273,  8281,  8289,  8293,  8320,  8322,  8328,  8330,  8341,  8352,  8354,  8357,  8360, | ||||||
|         39504, 39590, 40970, 40984, 40992, 41002, 41045, 41120, 41128, 41237, 41289, 41297, 41317, 41364, 41366, 41514, |          8362,  8453,  8465,  8468,  8473,  8485,  8514,  8516,  8521,  8533,  8536,  8538,  8545,  8548,  8549,  8550, | ||||||
|         41557, 41633, 41989, 42021, 42056, 42068, 42074, 42113, 42242, 42265, 42274, 42325, 42340, 42402, 42501, 42512, |          8581,  8592,  8598,  8601,  8613,  8705,  8712,  8714,  8721,  8725,  8736,  8738,  8744,  8746,  8773,  8785, | ||||||
|         42533, 42624, 42632, 42666, 43040, 43093, 43106, 43168, 43176, 43264, 43286, 43345, 43429, 43590, 43618, 43680, |          8790,  8793,  8805,  8833,  8840,  8842,  8849,  8853,  8864,  8866,  8872,  8874,  9221,  9236,  9238,  9241, | ||||||
|  |          9253,  9284,  9285,  9286,  9289,  9298,  9301,  9304,  9306,  9318,  9349,  9361,  9364,  9369,  9377,  9381, | ||||||
|  |          9481,  9493,  9505,  9513,  9536,  9541,  9544,  9553,  9556,  9557,  9561,  9570,  9573,  9576,  9609,  9616, | ||||||
|  |          9620,  9621,  9624,  9626,  9633,  9636,  9638,  9641,  9733,  9744,  9746,  9753,  9765,  9793,  9801,  9813, | ||||||
|  |          9824,  9825,  9833,  9860,  9862,  9872,  9882, 10240, 10242, 10248, 10250, 10261, 10272, 10274, 10280, 10282, | ||||||
|  |         10309, 10321, 10324, 10341, 10368, 10370, 10376, 10378, 10400, 10402, 10408, 10410, 10505, 10513, 10516, 10521, | ||||||
|  |         10533, 10566, 10569, 10578, 10581, 10593, 10596, 10598, 10601, 10629, 10640, 10646, 10649, 10660, 10661, 10752, | ||||||
|  |         10754, 10760, 10762, 10784, 10786, 10792, 10794, 10821, 10833, 10838, 10841, 10853, 10880, 10882, 10888, 10890, | ||||||
|  |         10901, 10912, 10914, 10920, 10922, 16389, 16401, 16406, 16421, 16457, 16466, 16469, 16472, 16474, 16481, 16484, | ||||||
|  |         16486, 16532, 16537, 16545, 16550, 16640, 16641, 16644, 16646, 16649, 16658, 16661, 16662, 16664, 16666, 16673, | ||||||
|  |         16678, 16681, 16709, 16712, 16714, 16721, 16724, 16725, 16726, 16729, 16730, 16741, 16744, 16746, 16769, 16772, | ||||||
|  |         16774, 16784, 16786, 16789, 16800, 16801, 16802, 16901, 16913, 16916, 16918, 16933, 16961, 16978, 16981, 16986, | ||||||
|  |         16996, 17001, 17033, 17044, 17061, 17409, 17429, 17433, 17449, 17477, 17480, 17482, 17489, 17492, 17493, 17494, | ||||||
|  |         17505, 17506, 17509, 17512, 17514, 17537, 17542, 17545, 17552, 17554, 17557, 17568, 17569, 17577, 17665, 17666, | ||||||
|  |         17669, 17674, 17681, 17684, 17685, 17686, 17689, 17696, 17701, 17706, 17729, 17732, 17733, 17734, 17737, 17744, | ||||||
|  |         17745, 17748, 17749, 17750, 17752, 17753, 17761, 17764, 17765, 17766, 17769, 17794, 17796, 17797, 17800, 17809, | ||||||
|  |         17812, 17813, 17814, 17817, 17818, 17829, 17832, 17834, 17921, 17925, 17929, 17940, 17941, 17944, 17946, 17953, | ||||||
|  |         17956, 17961, 17984, 17986, 17989, 17992, 18000, 18001, 18002, 18005, 18006, 18009, 18018, 18021, 18024, 18049, | ||||||
|  |         18053, 18058, 18068, 18069, 18081, 18084, 18086, 18437, 18449, 18453, 18458, 18469, 18498, 18505, 18512, 18517, | ||||||
|  |         18520, 18529, 18532, 18534, 18537, 18565, 18577, 18580, 18582, 18585, 18597, 18689, 18693, 18694, 18698, 18704, | ||||||
|  |         18708, 18709, 18712, 18721, 18724, 18726, 18752, 18757, 18762, 18769, 18770, 18772, 18773, 18774, 18777, 18784, | ||||||
|  |         18786, 18789, 18790, 18794, 18822, 18825, 18834, 18837, 18838, 18840, 18849, 18852, 18854, 18857, 18966, 19012, | ||||||
|  |         19014, 19017, 19029, 19032, 19034, 19044, 19049, 19092, 19109, 20481, 20484, 20485, 20486, 20489, 20498, 20501, | ||||||
|  |         20506, 20513, 20516, 20521, 20544, 20549, 20552, 20561, 20564, 20565, 20566, 20569, 20581, 20584, 20614, 20617, | ||||||
|  |         20629, 20632, 20640, 20641, 20646, 20649, 20741, 20744, 20745, 20746, 20753, 20756, 20757, 20758, 20760, 20761, | ||||||
|  |         20768, 20773, 20774, 20776, 20778, 20801, 20804, 20805, 20806, 20809, 20816, 20817, 20818, 20820, 20821, 20822, | ||||||
|  |         20824, 20825, 20826, 20833, 20836, 20837, 20838, 20841, 20866, 20869, 20881, 20884, 20885, 20886, 20889, 20896, | ||||||
|  |         20901, 20906, 20993, 20998, 21010, 21013, 21018, 21025, 21028, 21058, 21061, 21066, 21073, 21076, 21077, 21078, | ||||||
|  |         21081, 21090, 21093, 21125, 21136, 21138, 21141, 21145, 21146, 21156, 21508, 21509, 21521, 21524, 21525, 21526, | ||||||
|  |         21528, 21529, 21537, 21541, 21544, 21546, 21569, 21572, 21573, 21574, 21577, 21578, 21584, 21585, 21588, 21589, | ||||||
|  |         21590, 21592, 21593, 21594, 21601, 21602, 21604, 21605, 21606, 21609, 21632, 21640, 21642, 21649, 21652, 21653, | ||||||
|  |         21654, 21657, 21665, 21668, 21669, 21674, 21761, 21762, 21764, 21765, 21766, 21769, 21776, 21777, 21778, 21780, | ||||||
|  |         21781, 21782, 21785, 21786, 21793, 21796, 21797, 21798, 21801, 21824, 21825, 21826, 21828, 21829, 21830, 21832, | ||||||
|  |         21833, 21840, 21841, 21842, 21844, 21845, 21846, 21848, 21849, 21850, 21856, 21857, 21860, 21861, 21862, 21864, | ||||||
|  |         21865, 21866, 21889, 21892, 21893, 21897, 21898, 21904, 21905, 21908, 21909, 21910, 21912, 21913, 21921, 21924, | ||||||
|  |         21925, 21926, 21929, 22016, 22017, 22018, 22020, 22022, 22024, 22025, 22033, 22036, 22037, 22040, 22041, 22048, | ||||||
|  |         22049, 22050, 22052, 22053, 22054, 22056, 22057, 22081, 22085, 22086, 22088, 22089, 22090, 22096, 22097, 22098, | ||||||
|  |         22100, 22101, 22102, 22104, 22105, 22106, 22113, 22116, 22117, 22121, 22146, 22149, 22150, 22152, 22153, 22154, | ||||||
|  |         22161, 22165, 22170, 22178, 22181, 22182, 22184, 22185, 22532, 22533, 22534, 22537, 22544, 22549, 22552, 22561, | ||||||
|  |         22570, 22597, 22600, 22602, 22609, 22612, 22613, 22614, 22616, 22617, 22624, 22626, 22628, 22629, 22658, 22665, | ||||||
|  |         22672, 22674, 22677, 22680, 22689, 22697, 22785, 22786, 22789, 22794, 22801, 22804, 22805, 22806, 22809, 22821, | ||||||
|  |         22849, 22852, 22853, 22854, 22857, 22864, 22865, 22866, 22868, 22869, 22870, 22872, 22873, 22874, 22881, 22884, | ||||||
|  |         22885, 22886, 22889, 22913, 22917, 22921, 22929, 22932, 22933, 22934, 22936, 22937, 22949, 23044, 23048, 23061, | ||||||
|  |         23066, 23072, 23077, 23078, 23081, 23109, 23112, 23113, 23121, 23125, 23126, 23128, 23129, 23138, 23141, 23144, | ||||||
|  |         23146, 23169, 23178, 23186, 23189, 23190, 23192, 23194, 23201, 24581, 24596, 24598, 24601, 24613, 24644, 24656, | ||||||
|  |         24661, 24662, 24664, 24666, 24673, 24676, 24678, 24681, 24705, 24726, 24741, 24833, 24836, 24838, 24841, 24850, | ||||||
|  |         24853, 24865, 24866, 24870, 24873, 24901, 24905, 24913, 24917, 24918, 24921, 24933, 24934, 24938, 24964, 24970, | ||||||
|  |         24978, 24981, 24993, 24998, 25001, 25105, 25110, 25113, 25152, 25153, 25158, 25173, 25174, 25176, 25184, 25221, | ||||||
|  |         25233, 25238, 25253, 25617, 25618, 25621, 25622, 25626, 25633, 25638, 25641, 25664, 25666, 25669, 25672, 25674, | ||||||
|  |         25681, 25684, 25685, 25686, 25689, 25690, 25696, 25698, 25701, 25732, 25733, 25737, 25744, 25746, 25748, 25749, | ||||||
|  |         25750, 25752, 25754, 25761, 25764, 25769, 25861, 25864, 25866, 25873, 25877, 25878, 25881, 25924, 25925, 25926, | ||||||
|  |         25929, 25936, 25937, 25940, 25941, 25942, 25945, 25953, 25956, 25957, 25958, 25961, 25990, 25993, 25994, 26001, | ||||||
|  |         26005, 26006, 26009, 26010, 26018, 26021, 26022, 26024, 26114, 26121, 26133, 26144, 26150, 26152, 26153, 26176, | ||||||
|  |         26181, 26184, 26186, 26193, 26196, 26197, 26198, 26200, 26202, 26208, 26213, 26216, 26240, 26242, 26245, 26250, | ||||||
|  |         26260, 26262, 26264, 26265, 26272, 26276, 26278, 26282, 26646, 26649, 26661, 26689, 26706, 26709, 26714, 26721, | ||||||
|  |         26729, 26757, 26769, 26776, 26790, 26881, 26884, 26896, 26901, 26913, 26916, 26918, 26921, 26944, 26945, 26949, | ||||||
|  |         26950, 26952, 26961, 26964, 26965, 26966, 26969, 26976, 26981, 26986, 27010, 27012, 27018, 27029, 27041, 27044, | ||||||
|  |         27045, 27049, 27153, 27158, 27160, 27201, 27204, 27209, 27216, 27221, 27224, 27226, 27236, 27237, 27241, 27270, | ||||||
|  |         27284, 27288, 27290, 27302, 32768, 32770, 32776, 32778, 32800, 32802, 32808, 32810, 32837, 32848, 32849, 32852, | ||||||
|  |         32854, 32857, 32869, 32896, 32898, 32904, 32906, 32917, 32928, 32930, 32936, 32938, 33029, 33041, 33044, 33046, | ||||||
|  |         33049, 33061, 33089, 33092, 33097, 33104, 33106, 33109, 33110, 33112, 33113, 33124, 33126, 33129, 33157, 33161, | ||||||
|  |         33172, 33174, 33177, 33189, 33280, 33282, 33288, 33290, 33301, 33312, 33314, 33320, 33322, 33361, 33364, 33369, | ||||||
|  |         33381, 33408, 33410, 33416, 33418, 33429, 33440, 33442, 33448, 33450, 33812, 33817, 33857, 33860, 33873, 33877, | ||||||
|  |         33882, 33889, 33892, 33897, 33940, 33945, 34049, 34057, 34066, 34069, 34074, 34086, 34089, 34112, 34113, 34117, | ||||||
|  |         34120, 34129, 34132, 34133, 34134, 34137, 34138, 34149, 34150, 34152, 34154, 34177, 34180, 34182, 34185, 34192, | ||||||
|  |         34194, 34197, 34200, 34214, 34321, 34326, 34329, 34341, 34369, 34372, 34377, 34378, 34384, 34389, 34393, 34394, | ||||||
|  |         34401, 34406, 34410, 34437, 34449, 34458, 34468, 34816, 34818, 34824, 34826, 34837, 34848, 34850, 34856, 34858, | ||||||
|  |         34881, 34885, 34897, 34900, 34905, 34917, 34921, 34944, 34946, 34952, 34954, 34965, 34976, 34978, 34984, 34986, | ||||||
|  |         35077, 35078, 35089, 35092, 35094, 35109, 35137, 35140, 35142, 35145, 35152, 35154, 35157, 35162, 35169, 35172, | ||||||
|  |         35205, 35222, 35225, 35237, 35328, 35330, 35336, 35338, 35349, 35360, 35362, 35368, 35370, 35397, 35409, 35412, | ||||||
|  |         35414, 35456, 35458, 35464, 35466, 35477, 35488, 35490, 35496, 35498, 36869, 36881, 36886, 36888, 36889, 36901, | ||||||
|  |         36929, 36934, 36937, 36949, 36952, 36954, 36969, 36970, 36997, 37009, 37012, 37014, 37017, 37029, 37121, 37124, | ||||||
|  |         37126, 37129, 37136, 37141, 37144, 37146, 37153, 37156, 37158, 37161, 37184, 37189, 37200, 37201, 37204, 37205, | ||||||
|  |         37206, 37209, 37218, 37221, 37252, 37254, 37266, 37269, 37272, 37281, 37284, 37286, 37289, 37381, 37393, 37396, | ||||||
|  |         37401, 37413, 37444, 37446, 37449, 37456, 37458, 37461, 37464, 37478, 37481, 37509, 37524, 37526, 37545, 37889, | ||||||
|  |         37892, 37894, 37904, 37909, 37912, 37926, 37952, 37962, 37969, 37972, 37973, 37974, 37976, 37977, 37984, 37985, | ||||||
|  |         37986, 37989, 38020, 38022, 38034, 38036, 38037, 38040, 38049, 38057, 38144, 38149, 38152, 38154, 38160, 38161, | ||||||
|  |         38164, 38165, 38166, 38169, 38177, 38181, 38185, 38186, 38209, 38212, 38213, 38214, 38217, 38224, 38225, 38226, | ||||||
|  |         38228, 38229, 38230, 38232, 38233, 38234, 38241, 38244, 38245, 38246, 38249, 38273, 38277, 38280, 38289, 38290, | ||||||
|  |         38292, 38293, 38294, 38297, 38298, 38304, 38306, 38309, 38312, 38314, 38401, 38404, 38416, 38421, 38425, 38432, | ||||||
|  |         38438, 38441, 38469, 38472, 38473, 38481, 38482, 38485, 38486, 38489, 38501, 38504, 38530, 38532, 38537, 38538, | ||||||
|  |         38546, 38548, 38549, 38564, 38566, 38569, 38917, 38934, 38937, 38949, 38977, 38982, 38992, 38994, 38997, 38998, | ||||||
|  |         39002, 39012, 39013, 39045, 39057, 39062, 39065, 39077, 39172, 39174, 39177, 39184, 39186, 39189, 39192, 39194, | ||||||
|  |         39200, 39201, 39204, 39206, 39232, 39234, 39237, 39240, 39242, 39249, 39252, 39253, 39254, 39257, 39266, 39269, | ||||||
|  |         39270, 39274, 39297, 39300, 39312, 39314, 39317, 39322, 39329, 39334, 39429, 39445, 39461, 39492, 39494, 39497, | ||||||
|  |         39504, 39509, 39512, 39521, 39557, 39569, 39572, 39573, 39574, 40960, 40962, 40968, 40970, 40981, 40992, 40994, | ||||||
|  |         41000, 41002, 41029, 41041, 41044, 41046, 41049, 41088, 41090, 41096, 41098, 41109, 41120, 41122, 41128, 41130, | ||||||
|  |         41221, 41225, 41233, 41236, 41238, 41241, 41242, 41286, 41289, 41297, 41301, 41304, 41306, 41313, 41316, 41349, | ||||||
|  |         41360, 41362, 41366, 41369, 41474, 41480, 41482, 41488, 41497, 41506, 41512, 41514, 41541, 41553, 41558, 41561, | ||||||
|  |         41573, 41600, 41602, 41608, 41610, 41621, 41632, 41634, 41640, 41642, 42009, 42021, 42049, 42052, 42064, 42068, | ||||||
|  |         42069, 42072, 42074, 42081, 42085, 42086, 42088, 42089, 42117, 42246, 42249, 42256, 42258, 42261, 42264, 42278, | ||||||
|  |         42281, 42306, 42309, 42321, 42324, 42325, 42326, 42329, 42341, 42346, 42369, 42372, 42373, 42374, 42377, 42386, | ||||||
|  |         42389, 42392, 42501, 42513, 42518, 42522, 42529, 42533, 42564, 42566, 42570, 42578, 42581, 42582, 42584, 42592, | ||||||
|  |         42594, 42630, 42640, 42645, 42646, 42649, 42657, 42660, 42662, 43008, 43010, 43016, 43018, 43040, 43042, 43048, | ||||||
|  |         43050, 43089, 43092, 43094, 43097, 43136, 43138, 43144, 43146, 43157, 43168, 43170, 43176, 43178, 43269, 43284, | ||||||
|  |         43289, 43297, 43301, 43329, 43344, 43349, 43354, 43361, 43366, 43369, 43408, 43414, 43520, 43522, 43528, 43530, | ||||||
|  |         43552, 43554, 43560, 43562, 43601, 43604, 43606, 43648, 43650, 43656, 43658, 43669, 43680, 43682, 43688, 43690, | ||||||
|     }; |     }; | ||||||
|     static const uint16_t kgrid_2bit_1024[1024] = { |     static const uint16_t kgrid_2bit_1024[1024] = { | ||||||
|             0,     2,     5,     8,    10,    17,    20,    22,    25,    32,    34,    37,    40,    65,    68,    70, |             0,     2,     5,     8,    10,    17,    20,    22,    25,    32,    34,    37,    40,    65,    68,    70, | ||||||
| @@ -10169,7 +10198,7 @@ void iq2xs_init_impl(enum ggml_type type) { | |||||||
|     const int nwant = type == GGML_TYPE_IQ1_S ? 3 : type == GGML_TYPE_IQ2_S ? 1 : 2; |     const int nwant = type == GGML_TYPE_IQ1_S ? 3 : type == GGML_TYPE_IQ2_S ? 1 : 2; | ||||||
|     const uint16_t * kgrid = type == GGML_TYPE_IQ2_XXS ? kgrid_2bit_256 : |     const uint16_t * kgrid = type == GGML_TYPE_IQ2_XXS ? kgrid_2bit_256 : | ||||||
|                              type == GGML_TYPE_IQ2_XS  ? kgrid_2bit_512 : |                              type == GGML_TYPE_IQ2_XS  ? kgrid_2bit_512 : | ||||||
|                              type == GGML_TYPE_IQ1_S   ? kgrid_1bit_512 : kgrid_2bit_1024; |                              type == GGML_TYPE_IQ1_S   ? kgrid_1bit_2048 : kgrid_2bit_1024; | ||||||
|     uint64_t * kgrid_q2xs; |     uint64_t * kgrid_q2xs; | ||||||
|     int      * kmap_q2xs; |     int      * kmap_q2xs; | ||||||
|     uint16_t * kneighbors_q2xs; |     uint16_t * kneighbors_q2xs; | ||||||
| @@ -11408,12 +11437,70 @@ static int iq1_find_best_neighbour(const uint16_t * restrict neighbours, const u | |||||||
|     return grid_index; |     return grid_index; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | static int iq1_find_best_neighbour2(const uint16_t * restrict neighbours, const uint64_t * restrict grid, | ||||||
|  |         const float * restrict xval, const float * restrict weight, float scale, int8_t * restrict L, int ngrid) { | ||||||
|  |     int num_neighbors = neighbours[0]; | ||||||
|  |     GGML_ASSERT(num_neighbors > 0); | ||||||
|  |     float best_score = FLT_MAX; | ||||||
|  |     int grid_index = -1; | ||||||
|  |     for (int j = 1; j <= num_neighbors; ++j) { | ||||||
|  |         const int8_t * pg = (const int8_t *)(grid + neighbours[j]); | ||||||
|  |         float d2 = 0; | ||||||
|  |         for (int i = 0; i < 8; ++i) { | ||||||
|  |             float q = (pg[i] - 3)/2; | ||||||
|  |             float w = weight[i]; | ||||||
|  |             float diff = scale*q - xval[i]; | ||||||
|  |             d2 += w*diff*diff; | ||||||
|  |         } | ||||||
|  |         if (d2 < best_score) { | ||||||
|  |             best_score = d2; | ||||||
|  |             grid_index = neighbours[j]; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     if (grid_index < 0) { | ||||||
|  |         for (int i = 0; i < ngrid; ++i) { | ||||||
|  |             const int8_t * grid_i = (const int8_t *)(grid + i); | ||||||
|  |             float d2 = 0; | ||||||
|  |             for (int j = 0; j < 8; ++j) { | ||||||
|  |                 float w = weight[j]; | ||||||
|  |                 float q = (grid_i[j] - 3)/2; | ||||||
|  |                 float diff = scale*q - xval[i]; | ||||||
|  |                 d2 += w*diff*diff; | ||||||
|  |             } | ||||||
|  |             if (d2 < best_score) { | ||||||
|  |                 best_score = d2; | ||||||
|  |                 grid_index = i; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     if (grid_index < 0) { | ||||||
|  |         printf("Oops, did not find grid point\n"); | ||||||
|  |         printf("Have %d neighbours\n", num_neighbors); | ||||||
|  |         for (int j = 1; j <= num_neighbors; ++j) { | ||||||
|  |             const int8_t * pg = (const int8_t *)(grid + neighbours[j]); | ||||||
|  |             float sumqx = 0, sumq2 = 0; | ||||||
|  |             for (int i = 0; i < 8; ++i) { | ||||||
|  |                 float q = (pg[i] - 3)/2; | ||||||
|  |                 float w = weight[i]; | ||||||
|  |                 sumqx += w*q*xval[i]; | ||||||
|  |                 sumq2 += w*q*q; | ||||||
|  |             } | ||||||
|  |             printf("    neighbour %d: sumqx = %g sumq2 = %g\n", j, (double)sumqx, (double)sumq2); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     GGML_ASSERT(grid_index >= 0); | ||||||
|  |     const int8_t * pg = (const int8_t *)(grid + grid_index); | ||||||
|  |     for (int i = 0; i < 8; ++i) L[i] = (pg[i] - 1)/2; | ||||||
|  |     return grid_index; | ||||||
|  | } | ||||||
|  |  | ||||||
| static int iq1_sort_helper(const void * left, const void * right) { | static int iq1_sort_helper(const void * left, const void * right) { | ||||||
|     const float * l = left; |     const float * l = left; | ||||||
|     const float * r = right; |     const float * r = right; | ||||||
|     return *l < *r ? -1 : *l > *r ? 1 : 0; |     return *l < *r ? -1 : *l > *r ? 1 : 0; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #define IQ1S_BLOCK_SIZE 32 | ||||||
| static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy, int n, const float * restrict quant_weights) { | static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy, int n, const float * restrict quant_weights) { | ||||||
|  |  | ||||||
|     const int gindex = iq2_data_index(GGML_TYPE_IQ1_S); |     const int gindex = iq2_data_index(GGML_TYPE_IQ1_S); | ||||||
| @@ -11432,37 +11519,37 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy | |||||||
|  |  | ||||||
|     block_iq1_s * y = vy; |     block_iq1_s * y = vy; | ||||||
|  |  | ||||||
|     float  scales[QK_K/8]; |     float  scales[QK_K/IQ1S_BLOCK_SIZE]; | ||||||
|     float  weight[8]; |     float  weight[IQ1S_BLOCK_SIZE]; | ||||||
|     int8_t L[8]; |     int8_t L[IQ1S_BLOCK_SIZE]; | ||||||
|     float  sumx[9]; |     float  sumx[IQ1S_BLOCK_SIZE+1]; | ||||||
|     float  sumw[9]; |     float  sumw[IQ1S_BLOCK_SIZE+1]; | ||||||
|     float  pairs[16]; |     float  pairs[2*IQ1S_BLOCK_SIZE]; | ||||||
|     int * idx = (int *)(pairs + 1); |     int * idx = (int *)(pairs + 1); | ||||||
|     uint8_t hbit[QK_K/8]; |     uint16_t index[IQ1S_BLOCK_SIZE/8]; | ||||||
|  |  | ||||||
|     for (int ibl = 0; ibl < nbl; ++ibl) { |     for (int ibl = 0; ibl < nbl; ++ibl) { | ||||||
|  |  | ||||||
|         y[ibl].d = GGML_FP32_TO_FP16(0.f); |         y[ibl].d = GGML_FP32_TO_FP16(0.f); | ||||||
|         memset(y[ibl].qs, 0, QK_K/8); |         memset(y[ibl].qs, 0, QK_K/8); | ||||||
|         memset(y[ibl].scales, 0, QK_K/16); |         memset(y[ibl].qh, 0, QK_K/16); | ||||||
|  |  | ||||||
|         float max_scale = 0; |         float max_scale = 0; | ||||||
|  |  | ||||||
|         const float * xbl = x + QK_K*ibl; |         const float * xbl = x + QK_K*ibl; | ||||||
|         float sumx2 = 0; |         float sumx2 = 0; | ||||||
|         for (int i = 0; i < QK_K; ++i) sumx2 += xbl[i]*xbl[i]; |         for (int i = 0; i < QK_K; ++i) sumx2 += xbl[i]*xbl[i]; | ||||||
|         float sigma2 = sumx2/QK_K; |         float sigma2 = 2*sumx2/QK_K; | ||||||
|  |  | ||||||
|         for (int ib = 0; ib < QK_K/8; ++ib) { |         for (int ib = 0; ib < QK_K/IQ1S_BLOCK_SIZE; ++ib) { | ||||||
|             const float * xb = xbl + 8*ib; |             const float * xb = xbl + IQ1S_BLOCK_SIZE*ib; | ||||||
|             const float * qw = quant_weights + QK_K*ibl + 8*ib; |             const float * qw = quant_weights + QK_K*ibl + IQ1S_BLOCK_SIZE*ib; | ||||||
|             for (int i = 0; i < 8; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]); |             for (int i = 0; i < IQ1S_BLOCK_SIZE; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]); | ||||||
|             float max = fabsf(xb[0]); |             float max = fabsf(xb[0]); | ||||||
|             for (int i = 1; i < 8; ++i) max = MAX(max, fabsf(xb[i])); |             for (int i = 1; i < IQ1S_BLOCK_SIZE; ++i) max = MAX(max, fabsf(xb[i])); | ||||||
|             if (!max) { |             if (!max) { | ||||||
|                 scales[ib] = 0; |                 scales[ib] = 0; | ||||||
|                 memset(L, 1, 8); |                 memset(L, 1, IQ1S_BLOCK_SIZE); | ||||||
|                 continue; |                 continue; | ||||||
|             } |             } | ||||||
|             // Here we solve exactly the sum of squared difference (SSD) weighted minimization problem. |             // Here we solve exactly the sum of squared difference (SSD) weighted minimization problem. | ||||||
| @@ -11471,14 +11558,14 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy | |||||||
|             // in ascending order, compute Si = sum[weight[j] xb[j], j = 0...i] and |             // in ascending order, compute Si = sum[weight[j] xb[j], j = 0...i] and | ||||||
|             // Wi = sum[weight[j], j = 0...i], and use these to quckly get get the optimum scale |             // Wi = sum[weight[j], j = 0...i], and use these to quckly get get the optimum scale | ||||||
|             // for each possible and score for each split. |             // for each possible and score for each split. | ||||||
|             for (int j = 0; j < 8; ++j) { |             for (int j = 0; j < IQ1S_BLOCK_SIZE; ++j) { | ||||||
|                 pairs[2*j] = xb[j]; |                 pairs[2*j] = xb[j]; | ||||||
|                 idx[2*j] = j; |                 idx[2*j] = j; | ||||||
|             } |             } | ||||||
|             qsort(pairs, 8, 2*sizeof(float), iq1_sort_helper); |             qsort(pairs, IQ1S_BLOCK_SIZE, 2*sizeof(float), iq1_sort_helper); | ||||||
|             { |             { | ||||||
|                 sumx[0] = sumw[0] = 0; |                 sumx[0] = sumw[0] = 0; | ||||||
|                 for (int j = 0; j < 8; ++j) { |                 for (int j = 0; j < IQ1S_BLOCK_SIZE; ++j) { | ||||||
|                     int i = idx[2*j]; |                     int i = idx[2*j]; | ||||||
|                     sumx[j+1] = sumx[j] + weight[i]*xb[i]; |                     sumx[j+1] = sumx[j] + weight[i]*xb[i]; | ||||||
|                     sumw[j+1] = sumw[j] + weight[i]; |                     sumw[j+1] = sumw[j] + weight[i]; | ||||||
| @@ -11486,10 +11573,10 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy | |||||||
|             } |             } | ||||||
|             float best_score = 0, scale = max; |             float best_score = 0, scale = max; | ||||||
|             int besti1 = 0, besti2 = 0; |             int besti1 = 0, besti2 = 0; | ||||||
|             for (int i1 = 0; i1 <= 8; ++i1) { |             for (int i1 = 0; i1 <= IQ1S_BLOCK_SIZE; ++i1) { | ||||||
|                 for (int i2 = i1; i2 <= 8; ++i2) { |                 for (int i2 = i1; i2 <= IQ1S_BLOCK_SIZE; ++i2) { | ||||||
|                     float sumqx = -(sumx[i1] - sumx[0]) + (sumx[8] - sumx[i2]); |                     float sumqx = -(sumx[i1] - sumx[0]) + (sumx[IQ1S_BLOCK_SIZE] - sumx[i2]); | ||||||
|                     float sumq2 =  (sumw[i1] - sumw[0]) + (sumw[8] - sumw[i2]); |                     float sumq2 =  (sumw[i1] - sumw[0]) + (sumw[IQ1S_BLOCK_SIZE] - sumw[i2]); | ||||||
|                     if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) { |                     if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) { | ||||||
|                         scale = sumqx/sumq2; best_score = scale*sumqx; |                         scale = sumqx/sumq2; best_score = scale*sumqx; | ||||||
|                         besti1 = i1; besti2 = i2; |                         besti1 = i1; besti2 = i2; | ||||||
| @@ -11498,23 +11585,43 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy | |||||||
|             } |             } | ||||||
|             for (int j =      0; j < besti1; ++j) L[idx[2*j]] = 0; |             for (int j =      0; j < besti1; ++j) L[idx[2*j]] = 0; | ||||||
|             for (int j = besti1; j < besti2; ++j) L[idx[2*j]] = 1; |             for (int j = besti1; j < besti2; ++j) L[idx[2*j]] = 1; | ||||||
|             for (int j = besti2; j <      8; ++j) L[idx[2*j]] = 2; |             for (int j = besti2; j < IQ1S_BLOCK_SIZE; ++j) L[idx[2*j]] = 2; | ||||||
|             if (scale < 0) { |             if (scale < 0) { | ||||||
|                 for (int j = 0; j < 8; ++j) L[j] = 2 - L[j]; |                 for (int j = 0; j < IQ1S_BLOCK_SIZE; ++j) L[j] = 2 - L[j]; | ||||||
|                 scale = -scale; |                 scale = -scale; | ||||||
|             } |             } | ||||||
|             // Now we check if the solution found above corresponds to a grid point and, if not, use a neighbouring |             bool all_on_grid = true; | ||||||
|             // grid point that minimizes SSD. |             for (int k = 0; k < IQ1S_BLOCK_SIZE/8; ++k) { | ||||||
|                 uint16_t u = 0; |                 uint16_t u = 0; | ||||||
|             for (int j = 0; j < 8; ++j) u |= (L[j] << 2*j); |                 for (int j = 0; j < 8; ++j) u |= (L[8*k+j] << 2*j); | ||||||
|                 int grid_index = kmap_q2xs[u]; |                 int grid_index = kmap_q2xs[u]; | ||||||
|                 if (grid_index < 0) { |                 if (grid_index < 0) { | ||||||
|  |                     all_on_grid = false; | ||||||
|                     const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1; |                     const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1; | ||||||
|                 grid_index = iq1_find_best_neighbour(neighbours, kgrid_q2xs, xb, weight, &scale, L, NGRID_IQ2XXS); |                     grid_index = iq1_find_best_neighbour2(neighbours, kgrid_q2xs, xb + 8*k, weight + 8*k, scale, L + 8*k, NGRID_IQ1S); | ||||||
|                     GGML_ASSERT(grid_index >= 0); |                     GGML_ASSERT(grid_index >= 0); | ||||||
|                 } |                 } | ||||||
|             y[ibl].qs[ib] = grid_index & 255; |                 index[k] = grid_index; | ||||||
|             hbit[ib] = grid_index >> 8; |             } | ||||||
|  |             if (!all_on_grid) { | ||||||
|  |                 float sumqx = 0, sumq2 = 0; | ||||||
|  |                 for (int k = 0; k < IQ1S_BLOCK_SIZE/8; ++k) { | ||||||
|  |                     const int8_t * pg = (const int8_t *)(kgrid_q2xs + index[k]); | ||||||
|  |                     for (int j = 0; j < 8; ++j) { | ||||||
|  |                         float w = weight[8*k + j]; | ||||||
|  |                         float q = (pg[j] - 3)/2; | ||||||
|  |                         sumqx += w*q*xb[8*k+j]; | ||||||
|  |                         sumq2 += w*q*q; | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |                 if (sumqx > 0 && sumq2 > 0) scale = sumqx/sumq2; | ||||||
|  |             } | ||||||
|  |             uint16_t h = 0; | ||||||
|  |             for (int k = 0; k < IQ1S_BLOCK_SIZE/8; ++k) { | ||||||
|  |                 y[ibl].qs[(IQ1S_BLOCK_SIZE/8)*ib + k] = index[k] & 255; | ||||||
|  |                 h |= (index[k] >> 8) << 3*k; | ||||||
|  |             } | ||||||
|  |             y[ibl].qh[ib] = h; | ||||||
|             GGML_ASSERT(scale >= 0); |             GGML_ASSERT(scale >= 0); | ||||||
|             scales[ib] = scale; |             scales[ib] = scale; | ||||||
|             max_scale = MAX(max_scale, scale); |             max_scale = MAX(max_scale, scale); | ||||||
| @@ -11525,14 +11632,13 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy | |||||||
|             continue; |             continue; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         float d = max_scale/15; |         float d = max_scale/31; | ||||||
|         y[ibl].d = GGML_FP32_TO_FP16(d*1.085f); // 1.085f is another fudge factor. Don't ask me why it is needed. |         y[ibl].d = GGML_FP32_TO_FP16(d*1.125f); // 1.085f is another fudge factor. Don't ask me why it is needed. | ||||||
|         float id = 1/d; |         float id = 1/d; | ||||||
|         for (int ib = 0; ib < QK_K/8; ++ib) { |         for (int ib = 0; ib < QK_K/IQ1S_BLOCK_SIZE; ++ib) { | ||||||
|             int l = nearest_int(0.5f*(id*scales[ib]-1)); |             int l = nearest_int(0.5f*(id*scales[ib]-1)); | ||||||
|             l = MAX(0, MIN(7, l)); |             l = MAX(0, MIN(15, l)); | ||||||
|             if (hbit[ib]) l |= 8; |             y[ibl].qh[ib] |= (l << 12); | ||||||
|             y[ibl].scales[ib/2] |= (l << 4*(ib%2)); |  | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -218,7 +218,7 @@ static_assert(sizeof(block_iq3_s) == sizeof(ggml_fp16_t) + 13*(QK_K/32) + IQ3S_N | |||||||
| typedef struct { | typedef struct { | ||||||
|     ggml_fp16_t d; |     ggml_fp16_t d; | ||||||
|     uint8_t  qs[QK_K/8]; |     uint8_t  qs[QK_K/8]; | ||||||
|     uint8_t scales[QK_K/16]; |     uint16_t qh[QK_K/32]; | ||||||
| } block_iq1_s; | } block_iq1_s; | ||||||
| static_assert(sizeof(block_iq1_s) == sizeof(ggml_fp16_t) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding"); | static_assert(sizeof(block_iq1_s) == sizeof(ggml_fp16_t) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding"); | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Kawrakow
					Kawrakow