Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ option(BITNET_ARM_TL1 "bitnet.cpp: use tl1 on arm platform" OFF)
option(BITNET_X86_TL2 "bitnet.cpp: use tl2 on x86 platform" OFF)


set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED true)
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED true)
Expand Down
1,460 changes: 1,460 additions & 0 deletions include/bitnet-lut-kernels.h

Large diffs are not rendered by default.

28 changes: 28 additions & 0 deletions include/kernel_config.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
[Kernels_0]
m = 5120
k = 5120
bm = 256
bk = 96
bmm = 32

[Kernels_1]
m = 1024
k = 5120
bm = 256
bk = 96
bmm = 32

[Kernels_2]
m = 27648
k = 5120
bm = 256
bk = 96
bmm = 32

[Kernels_3]
m = 5120
k = 27648
bm = 128
bk = 96
bmm = 32

2 changes: 1 addition & 1 deletion src/ggml-bitnet-mad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -808,7 +808,7 @@ void ggml_vec_dot_i2_i8_s_Nx1(int n, float * s, size_t bs, const void * vx, size
accu[iy] = _mm256_setzero_si256();
}

int8_t * y_col = y + col * by;
const int8_t * y_col = y + col * by;

for (int i = 0; i < group32_num; i++) {
const uint8_t *px = x + i * 1024;
Expand Down
6 changes: 5 additions & 1 deletion utils/codegen_tl2.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,7 +690,11 @@ def get_three_k_two_k(K, bk):
"Llama3-8B-1.58-100B-tokens" : [[14336, 4096],
[4096, 14336],
[1024, 4096],
[4096, 4096]]
[4096, 4096]],
"Qwen2.5-32B-TL2" : [[5120, 5120],
[1024, 5120],
[27648, 5120],
[5120, 27648]]
}

parser = argparse.ArgumentParser(description='gen impl')
Expand Down
103 changes: 103 additions & 0 deletions windows-tl2-build-fixes.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
diff --git a/common/common.cpp b/common/common.cpp
index 451307b5..20542501 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -16,6 +16,7 @@
#include <cmath>
#include <codecvt>
#include <cstdarg>
+#include <chrono>
#include <cstring>
#include <ctime>
#include <fstream>
diff --git a/common/log.cpp b/common/log.cpp
index 04c7c0ed..aea209fc 100644
--- a/common/log.cpp
+++ b/common/log.cpp
@@ -1,5 +1,6 @@
#include "log.h"

+#include <chrono>
#include <condition_variable>
#include <cstdarg>
#include <cstdio>
diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp
index d1ff3e8b..bb4f8d9a 100644
--- a/examples/imatrix/imatrix.cpp
+++ b/examples/imatrix/imatrix.cpp
@@ -3,6 +3,7 @@
#include "log.h"
#include "llama.h"

+#include <chrono>
#include <cmath>
#include <cstdio>
#include <cstring>
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp
index efb41b80..2f821e0c 100644
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -6,6 +6,7 @@
#include <algorithm>
#include <array>
#include <atomic>
+#include <chrono>
#include <cmath>
#include <cstdio>
#include <cstring>
diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt
index 1b425ede..dad3dfff 100644
--- a/ggml/CMakeLists.txt
+++ b/ggml/CMakeLists.txt
@@ -186,7 +186,7 @@ endif()
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED true)

-if (GGML_SYCL)
+if (GGML_SYCL OR GGML_BITNET_X86_TL2)
set(CMAKE_CXX_STANDARD 17)
else()
set(CMAKE_CXX_STANDARD 11)
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index 121f72da..bad7f8d4 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -12686,7 +12686,7 @@ static void ggml_compute_forward_mul_mat(

struct bitnet_tensor_extra * wt = src0->extra;
char * cur_wdata = wdata;
- bitnet_float_type * bitnet_f_ptr = wdata;
+ bitnet_float_type * bitnet_f_ptr = (bitnet_float_type *) wdata;
if (sizeof(bitnet_float_type) == 2) {
cur_wdata = wdata + MAX(ne10, ne01) * ne11 * sizeof(bitnet_float_type);
};
@@ -12705,7 +12705,7 @@ static void ggml_compute_forward_mul_mat(
GGML_ASSERT(src1->type == GGML_TYPE_F32);
bitnet_float_type * act_input;
if (sizeof(bitnet_float_type) == 2) {
- ggml_fp32_to_fp16_row(src1->data, bitnet_f_ptr, ne10 * ne11);
+ ggml_fp32_to_fp16_row(src1->data, (ggml_fp16_t *)bitnet_f_ptr, ne10 * ne11);
act_input = bitnet_f_ptr;
} else {
act_input = src1->data;
@@ -12760,9 +12760,9 @@ static void ggml_compute_forward_mul_mat(
// src1: activation, ne10 = k, ne11 = m
char * wdata = params->wdata;

- struct bitnet_tensor_extra * wt = src0->extra;
+ struct bitnet_tensor_extra * wt = (struct bitnet_tensor_extra *) src0->extra;
char * cur_wdata = wdata;
- bitnet_float_type * bitnet_f_ptr = wdata;
+ bitnet_float_type * bitnet_f_ptr = (bitnet_float_type *) wdata;
if (sizeof(bitnet_float_type) == 2) {
cur_wdata = wdata + MAX(ne10, ne01) * ne11 * sizeof(bitnet_float_type);
};
@@ -12787,7 +12787,7 @@ static void ggml_compute_forward_mul_mat(
GGML_ASSERT(src1->type == GGML_TYPE_F32);
bitnet_float_type * act_input;
if (sizeof(bitnet_float_type) == 2) {
- ggml_fp32_to_fp16_row(src1->data, bitnet_f_ptr, ne10 * ne11);
+ ggml_fp32_to_fp16_row(src1->data, (ggml_fp16_t *)bitnet_f_ptr, ne10 * ne11);
act_input = bitnet_f_ptr;
} else {
act_input = src1->data;