ggerganov
diff --git a/‎Makefile‎
Lines changed: 1 addition & 1 deletion b/‎Makefile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Sources/whisper/ggml-alloc.c‎
Lines changed: 12 additions & 11 deletions b/‎Sources/whisper/ggml-alloc.c‎
Lines changed: 12 additions & 11 deletions
diff --git a/‎Sources/whisper/ggml-metal.h‎
Lines changed: 7 additions & 0 deletions b/‎Sources/whisper/ggml-metal.h‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎Sources/whisper/ggml-metal.m‎
Lines changed: 11 additions & 1 deletion b/‎Sources/whisper/ggml-metal.m‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎Sources/whisper/ggml-quants.c‎
Lines changed: 5 additions & 0 deletions b/‎Sources/whisper/ggml-quants.c‎
Lines changed: 5 additions & 0 deletions
@@ -42,7 +42,7 @@ publish: publish-trigger
 		\n\
 		cd /path/to/whisper.cpp/bindings/ios\n\
 		git commit\n\
-		git tag 1.5.0\n\
+		git tag 1.5.1\n\
 		git push origin master --tags\n\
 		"
 
 
@@ -446,12 +446,14 @@ static ggml_tallocr_t node_tallocr(ggml_gallocr_t galloc, struct ggml_tensor * n
     return galloc->hash_allocs[ggml_hash_find_or_insert(galloc->hash_set, node)];
 }
 
-static void init_view(ggml_gallocr_t galloc, struct ggml_tensor * view) {
+static void init_view(ggml_gallocr_t galloc, struct ggml_tensor * view, bool update_backend) {
     ggml_tallocr_t alloc = node_tallocr(galloc, view);
 
     //printf("init_view: %s from src %s\n", view->name, view->view_src->name);
     GGML_ASSERT(view->view_src != NULL && view->view_src->data != NULL);
-    view->backend = view->view_src->backend;
+    if (update_backend) {
+        view->backend = view->view_src->backend;
+    }
     view->buffer  = view->view_src->buffer;
     view->data    = (char *)view->view_src->data + view->view_offs;
 
@@ -469,7 +471,7 @@ static void allocate_node(ggml_gallocr_t galloc, struct ggml_tensor * node) {
 
     if (node->data == NULL) {
         if (ggml_is_view(node)) {
-            init_view(galloc, node);
+            init_view(galloc, node, true);
         } else {
             // see if we can reuse a parent's buffer (inplace)
             if (ggml_op_can_inplace(node->op)) {
@@ -499,15 +501,14 @@ static void allocate_node(ggml_gallocr_t galloc, struct ggml_tensor * node) {
                                 AT_PRINTF("reusing view parent %s (%s) for %s\n", parent->name, view_src->name, node->name);
                                 node->view_src = view_src;
                                 view_src_hn->n_views += 1;
-                                init_view(galloc, node);
+                                init_view(galloc, node, false);
                                 return;
                             }
-                        }
-                        else {
+                        } else {
                             AT_PRINTF("reusing parent %s for %s\n", parent->name, node->name);
                             node->view_src = parent;
                             p_hn->n_views += 1;
-                            init_view(galloc, node);
+                            init_view(galloc, node, false);
                             return;
                         }
                     }
@@ -537,7 +538,7 @@ static void ggml_tallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
             hash_get(galloc, view_src)->n_views += 1;
             if (node->buffer == NULL && node->data != NULL) {
                 // view of a pre-allocated tensor, didn't call init_view() yet
-                init_view(galloc, node);
+                init_view(galloc, node, true);
             }
         }
 
@@ -548,7 +549,7 @@ static void ggml_tallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
             }
             hash_get(galloc, parent)->n_children += 1;
             if (ggml_is_view(parent) && parent->buffer == NULL && parent->data != NULL) {
-                init_view(galloc, parent);
+                init_view(galloc, parent, true);
             }
         }
    }
@@ -663,7 +664,7 @@ size_t ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, ggml_tallocr_t talloc, st
     return max_size;
 }
 
-void ggml_gallocr_alloc_graph_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, struct ggml_hash_set hash_set, ggml_tallocr_t * hash_node_alloct) {
+void ggml_gallocr_alloc_graph_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, struct ggml_hash_set hash_set, ggml_tallocr_t * hash_node_talloc) {
     const size_t hash_size = hash_set.size;
 
     GGML_ASSERT(hash_size >= (size_t)(graph->n_nodes + graph->n_leafs));
@@ -686,7 +687,7 @@ void ggml_gallocr_alloc_graph_n(ggml_gallocr_t galloc, struct ggml_cgraph * grap
     // reset hash values
     memset(galloc->hash_values, 0, sizeof(struct hash_node) * hash_size);
 
-    galloc->hash_allocs = hash_node_alloct;
+    galloc->hash_allocs = hash_node_talloc;
 
     ggml_tallocr_alloc_graph_impl(galloc, graph);
 
 
@@ -52,6 +52,11 @@ void ggml_metal_free(struct ggml_metal_context * ctx);
 void * ggml_metal_host_malloc(size_t n);
 void   ggml_metal_host_free  (void * data);
 
+// helper to check if the device supports a specific family
+// ideally, the user code should be doing these checks
+// ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
+bool ggml_metal_supports_family(struct ggml_metal_context * ctx, int family);
+
 // set the number of command buffers to use
 void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb);
 
@@ -100,6 +105,8 @@ GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);
 
 GGML_API void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb);
 
+GGML_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family);
+
 #ifdef __cplusplus
 }
 #endif
 
@@ -459,6 +459,10 @@ void ggml_metal_host_free(void * data) {
     free(data);
 }
 
+bool ggml_metal_supports_family(struct ggml_metal_context * ctx, int family) {
+    return [ctx->device supportsFamily:(MTLGPUFamilyApple1 + family - 1)];
+}
+
 void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb) {
     ctx->n_cb = MIN(n_cb, GGML_METAL_MAX_BUFFERS);
 }
@@ -1072,7 +1076,7 @@ void ggml_metal_graph_compute(
                             GGML_ASSERT(ne00 == ne10);
                             GGML_ASSERT(ne03 == ne13);
 
-                            const uint gqa = ne12/ne02;
+                            const unsigned int gqa = ne12/ne02;
 
                             // find the break-even point where the matrix-matrix kernel becomes more efficient compared
                             // to the matrix-vector kernel
@@ -1751,3 +1755,9 @@ void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb) {
 
     ggml_metal_set_n_cb(ctx, n_cb);
 }
+
+bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family) {
+    struct ggml_metal_context * ctx = (struct ggml_metal_context *)backend->context;
+
+    return ggml_metal_supports_family(ctx, family);
+}
@@ -1368,7 +1368,12 @@ static float make_qkx2_quants(int n, int nmax, const float * restrict x, const f
     float max = x[0];
     float sum_w = weights[0];
     float sum_x = sum_w * x[0];
+#ifdef HAVE_BUGGY_APPLE_LINKER
+    // use 'volatile' to prevent unroll and work around a bug in Apple ld64 1015.7
+    for (volatile int i = 1; i < n; ++i) {
+#else
     for (int i = 1; i < n; ++i) {
+#endif
         if (x[i] < min) min = x[i];
         if (x[i] > max) max = x[i];
         float w = weights[i];
Original file line number	Diff line number	Diff line change
`@@ -42,7 +42,7 @@ publish: publish-trigger`
`42`	`42`	`\n\`
`43`	`43`	`cd /path/to/whisper.cpp/bindings/ios\n\`
`44`	`44`	`git commit\n\`
`45`		`- git tag 1.5.0\n\`
	`45`	`+ git tag 1.5.1\n\`
`46`	`46`	`git push origin master --tags\n\`
`47`	`47`	`"`
`48`	`48`