model, onnx wasm runner and embeddings

a119e9da · Konrad Völkel · d5f3baba · a119e9da · a119e9da · a119e9da
Commit a119e9da authored 4 months ago by Konrad Völkel
--- a/embeddings/largeEmbedding.json
+++ b/embeddings/largeEmbedding.json
--- a/embeddings/smallEmbedding.json
+++ b/embeddings/smallEmbedding.json
--- a/models/Xenova/all-MiniLM-L6-v2/README.md
+++ b/models/Xenova/all-MiniLM-L6-v2/README.md
+---
+base_model: sentence-transformers/all-MiniLM-L6-v2
+library_name: transformers.js
+license: apache-2.0
+---
+
+https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2 with ONNX weights to be compatible with Transformers.js.
+
+## Usage (Transformers.js)
+
+If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using:
+```bash
+npm i @huggingface/transformers
+```
+
+You can then use the model to compute embeddings like this:
+
+```js
+import { pipeline } from '@huggingface/transformers';
+
+// Create a feature-extraction pipeline
+const extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
+
+// Compute sentence embeddings
+const sentences = ['This is an example sentence', 'Each sentence is converted'];
+const output = await extractor(sentences, { pooling: 'mean', normalize: true });
+console.log(output);
+// Tensor {
+//   dims: [ 2, 384 ],
+//   type: 'float32',
+//   data: Float32Array(768) [ 0.04592696577310562, 0.07328180968761444, ... ],
+//   size: 768
+// }
+```
+
+You can convert this Tensor to a nested JavaScript array using `.tolist()`:
+```js
+console.log(output.tolist());
+// [
+//   [ 0.04592696577310562, 0.07328180968761444, 0.05400655046105385, ... ],
+//   [ 0.08188057690858841, 0.10760223120450974, -0.013241755776107311, ... ]
+// ]
+```
+
+
+Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`).
\ No newline at end of file
--- a/models/Xenova/all-MiniLM-L6-v2/config.json
+++ b/models/Xenova/all-MiniLM-L6-v2/config.json
+{
+  "_name_or_path": "sentence-transformers/all-MiniLM-L6-v2",
+  "architectures": [
+    "BertModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 384,
+  "initializer_range": 0.02,
+  "intermediate_size": 1536,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 6,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.29.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}
--- a/models/Xenova/all-MiniLM-L6-v2/onnx/model_quantized.onnx
+++ b/models/Xenova/all-MiniLM-L6-v2/onnx/model_quantized.onnx
--- a/models/Xenova/all-MiniLM-L6-v2/special_tokens_map.json
+++ b/models/Xenova/all-MiniLM-L6-v2/special_tokens_map.json
+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}
--- a/models/Xenova/all-MiniLM-L6-v2/tokenizer.json
+++ b/models/Xenova/all-MiniLM-L6-v2/tokenizer.json
--- a/models/Xenova/all-MiniLM-L6-v2/tokenizer_config.json
+++ b/models/Xenova/all-MiniLM-L6-v2/tokenizer_config.json
+{
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}
--- a/models/Xenova/all-MiniLM-L6-v2/vocab.txt
+++ b/models/Xenova/all-MiniLM-L6-v2/vocab.txt
--- a/wasm/ort-wasm-simd.wasm
+++ b/wasm/ort-wasm-simd.wasm