|
| 1 | +/* |
| 2 | +** 2024-07-04 |
| 3 | +** |
| 4 | +** Copyright 2024 the libSQL authors |
| 5 | +** |
| 6 | +** Permission is hereby granted, free of charge, to any person obtaining a copy of |
| 7 | +** this software and associated documentation files (the "Software"), to deal in |
| 8 | +** the Software without restriction, including without limitation the rights to |
| 9 | +** use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of |
| 10 | +** the Software, and to permit persons to whom the Software is furnished to do so, |
| 11 | +** subject to the following conditions: |
| 12 | +** |
| 13 | +** The above copyright notice and this permission notice shall be included in all |
| 14 | +** copies or substantial portions of the Software. |
| 15 | +** |
| 16 | +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 17 | +** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS |
| 18 | +** FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR |
| 19 | +** COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER |
| 20 | +** IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| 21 | +** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 22 | +** |
| 23 | +****************************************************************************** |
| 24 | +** |
| 25 | +** 16-bit (FLOAT16) floating point vector format utilities. |
| 26 | +*/ |
| 27 | +#ifndef SQLITE_OMIT_VECTOR |
| 28 | +#include "sqliteInt.h" |
| 29 | + |
| 30 | +#include "vectorInt.h" |
| 31 | + |
| 32 | +#include <math.h> |
| 33 | + |
| 34 | +/************************************************************************** |
| 35 | +** Utility routines for vector serialization and deserialization |
| 36 | +**************************************************************************/ |
| 37 | + |
| 38 | +// f32: [fffffffffffffffffffffffeeeeeeees] |
| 39 | +// 01234567890123456789012345678901 |
| 40 | +// f16: [ffffffffffeeeees] |
| 41 | +// 0123456789012345 |
| 42 | + |
| 43 | +static float vectorF16ToFloat(u16 f16){ |
| 44 | + u32 f32; |
| 45 | + // sng: [0000000000000000000000000000000s] |
| 46 | + u32 sgn = ((u32)f16 & 0x8000) << 16; |
| 47 | + |
| 48 | + int expBits = (f16 >> 10) & 0x1f; |
| 49 | + int exp = expBits - 15; // 15 is exp bias for f16 |
| 50 | + |
| 51 | + u32 mnt = ((u32)f16 & 0x3ff); |
| 52 | + u32 mntNonZero = !!mnt; |
| 53 | + |
| 54 | + if( exp == 16 ){ // NaN or +/- Infinity |
| 55 | + exp = 128, mnt = mntNonZero << 22; // set mnt high bit to represent NaN if it was NaN in f16 |
| 56 | + }else if( exp == -15 && mnt == 0 ){ // zero |
| 57 | + exp = -127, mnt = 0; |
| 58 | + }else if( exp == -15 ){ // denormalized value |
| 59 | + // shift mantissa until we get 1 as a high bit |
| 60 | + exp++; |
| 61 | + while( (mnt & 0x400) == 0 ){ |
| 62 | + mnt <<= 1; |
| 63 | + exp--; |
| 64 | + } |
| 65 | + // then reset high bit as this will be normal value (not denormalized) in f32 |
| 66 | + mnt &= 0x3ff; |
| 67 | + mnt <<= 13; |
| 68 | + }else{ |
| 69 | + mnt <<= 13; |
| 70 | + } |
| 71 | + f32 = sgn | ((u32)(exp + 127) << 23) | mnt; |
| 72 | + return *((float*)&f32); |
| 73 | +} |
| 74 | + |
| 75 | +static u16 vectorF16FromFloat(float f){ |
| 76 | + u32 i = *((u32*)&f); |
| 77 | + |
| 78 | + // sng: [000000000000000s] |
| 79 | + u32 sgn = (i >> 16) & (0x8000); |
| 80 | + |
| 81 | + // expBits: [eeeeeeee] |
| 82 | + int expBits = (i >> 23) & (0xff); |
| 83 | + int exp = expBits - 127; // 127 is exp bias for f32 |
| 84 | + |
| 85 | + // mntBits: [fffffffffffffffffffffff] |
| 86 | + u32 mntBits = (i & 0x7fffff); |
| 87 | + u32 mntNonZero = !!mntBits; |
| 88 | + u32 mnt; |
| 89 | + |
| 90 | + if( exp == 128 ){ // NaN or +/- Infinity |
| 91 | + exp = 16, mntBits = mntNonZero << 22; // set mnt high bit to represent NaN if it was NaN in f32 |
| 92 | + }else if( exp > 15 ){ // just too big numbers for f16 |
| 93 | + exp = 16, mntBits = 0; |
| 94 | + }else if( exp < -14 && exp >= -25 ){ // small value, but we can be represented as denormalized f16 |
| 95 | + // set high bit to 1 as normally mantissa has form 1.[mnt] but denormalized mantissa has form 0.[mnt] |
| 96 | + mntBits = (mntBits | 0x800000) >> (-exp - 14); |
| 97 | + exp = -15; |
| 98 | + }else if( exp < -24 ){ // very small or denormalized value |
| 99 | + exp = -15, mntBits = 0; |
| 100 | + } |
| 101 | + // round to nearest, ties to even |
| 102 | + if( (mntBits & 0x1fff) > (0x1000 - ((mntBits >> 13) & 1)) ){ |
| 103 | + mntBits += 0x2000; |
| 104 | + } |
| 105 | + mnt = mntBits >> 13; |
| 106 | + |
| 107 | + // handle overflow here (note, that overflow can happen only if exp < 16) |
| 108 | + return sgn | ((u32)(exp + 15 + (mnt >> 10)) << 10) | (mnt & 0x3ff); |
| 109 | +} |
| 110 | + |
| 111 | +void vectorF16Dump(const Vector *pVec){ |
| 112 | + u16 *elems = pVec->data; |
| 113 | + unsigned i; |
| 114 | + |
| 115 | + assert( pVec->type == VECTOR_TYPE_FLOAT16 ); |
| 116 | + |
| 117 | + printf("f16: ["); |
| 118 | + for(i = 0; i < pVec->dims; i++){ |
| 119 | + printf("%s%f", i == 0 ? "" : ", ", vectorF16ToFloat(elems[i])); |
| 120 | + } |
| 121 | + printf("]\n"); |
| 122 | +} |
| 123 | + |
| 124 | +void vectorF16SerializeToBlob( |
| 125 | + const Vector *pVector, |
| 126 | + unsigned char *pBlob, |
| 127 | + size_t nBlobSize |
| 128 | +){ |
| 129 | + float alpha, shift; |
| 130 | + |
| 131 | + assert( pVector->type == VECTOR_TYPE_FLOAT16 ); |
| 132 | + assert( pVector->dims <= MAX_VECTOR_SZ ); |
| 133 | + assert( nBlobSize >= vectorDataSize(pVector->type, pVector->dims) ); |
| 134 | + |
| 135 | + memcpy(pBlob, pVector->data, pVector->dims * sizeof(u16)); |
| 136 | +} |
| 137 | + |
| 138 | +float vectorF16DistanceCos(const Vector *v1, const Vector *v2){ |
| 139 | + int i; |
| 140 | + float dot = 0, norm1 = 0, norm2 = 0; |
| 141 | + float value1, value2; |
| 142 | + u16 *data1 = v1->data, *data2 = v2->data; |
| 143 | + |
| 144 | + assert( v1->dims == v2->dims ); |
| 145 | + assert( v1->type == VECTOR_TYPE_FLOAT16 ); |
| 146 | + assert( v2->type == VECTOR_TYPE_FLOAT16 ); |
| 147 | + |
| 148 | + for(i = 0; i < v1->dims; i++){ |
| 149 | + value1 = vectorF16ToFloat(data1[i]); |
| 150 | + value2 = vectorF16ToFloat(data2[i]); |
| 151 | + dot += value1*value2; |
| 152 | + norm1 += value1*value1; |
| 153 | + norm2 += value2*value2; |
| 154 | + } |
| 155 | + |
| 156 | + return 1.0 - (dot / sqrt(norm1 * norm2)); |
| 157 | +} |
| 158 | + |
| 159 | +float vectorF16DistanceL2(const Vector *v1, const Vector *v2){ |
| 160 | + int i; |
| 161 | + float sum = 0; |
| 162 | + float value1, value2; |
| 163 | + u8 *data1 = v1->data, *data2 = v2->data; |
| 164 | + |
| 165 | + assert( v1->dims == v2->dims ); |
| 166 | + assert( v1->type == VECTOR_TYPE_FLOAT16 ); |
| 167 | + assert( v2->type == VECTOR_TYPE_FLOAT16 ); |
| 168 | + |
| 169 | + for(i = 0; i < v1->dims; i++){ |
| 170 | + value1 = vectorF16ToFloat(data1[i]); |
| 171 | + value2 = vectorF16ToFloat(data2[i]); |
| 172 | + float d = (value1 - value2); |
| 173 | + sum += d*d; |
| 174 | + } |
| 175 | + return sqrt(sum); |
| 176 | +} |
| 177 | + |
| 178 | +void vectorF16DeserializeFromBlob( |
| 179 | + Vector *pVector, |
| 180 | + const unsigned char *pBlob, |
| 181 | + size_t nBlobSize |
| 182 | +){ |
| 183 | + assert( pVector->type == VECTOR_TYPE_FLOAT16 ); |
| 184 | + assert( 0 <= pVector->dims && pVector->dims <= MAX_VECTOR_SZ ); |
| 185 | + assert( nBlobSize >= vectorDataSize(pVector->type, pVector->dims) ); |
| 186 | + |
| 187 | + memcpy((u8*)pVector->data, (u8*)pBlob, pVector->dims * sizeof(u16)); |
| 188 | +} |
| 189 | + |
| 190 | +#endif /* !defined(SQLITE_OMIT_VECTOR) */ |
0 commit comments