Skip to content
Snippets Groups Projects
Commit 85a641fb authored by Konrad Völkel's avatar Konrad Völkel
Browse files

modules

parent 60be3d75
Branches
No related tags found
No related merge requests found
import { convertToFloat32 } from './faqData.js';
/** import embeddings from JSON files in file system */
async function loadJsonData(filePath) {
try {
const response = await fetch(filePath);
if (!response.ok) return null;
const jsonData = JSON.parse(await response.text());
return convertToFloat32(jsonData);
} catch (err) {
return null;
}
}
/** check if embeddings are presenet in local storage. If not, try getting them from JSON file */
export async function loadEmbedding(key, filePath) {
const retrievedEmb = JSON.parse(localStorage.getItem(key));
if (retrievedEmb && Object.keys(retrievedEmb).length > 0) {
return convertToFloat32(retrievedEmb);
} else {
return await loadJsonData(filePath) || {};
}
}
/** get rid of old embeddings */
export function removeOutdatedEntries(stack, validKeys) {
Object.keys(stack).forEach(key => {
if (!validKeys.has(key)) {
delete stack[key];
}
});
}
import { tokenizeText } from './semanticSearch.js';
/** import the questions, answers and panel Ids from HHU FAQ site */
/** panel Ids needed for locating and highlighting chat response */
export async function extractFAQfromHTML(url) {
const response = await fetch(url);
const htmlText = await response.text();
const parser = new DOMParser();
const doc = parser.parseFromString(htmlText, "text/html");
const result = [];
const akkordeonGroup = doc.querySelectorAll('.panel-group');
akkordeonGroup.forEach(group => {
const akkordeonId = group.id;
const panels = group.querySelectorAll('.panel');
panels.forEach(panel => {
const questionButton = panel.querySelector('button');
const panelBody = panel.querySelector('.panel-body');
if (questionButton && panelBody) {
const buttonId = questionButton.id;
/** extracting the relevant parts */
const questionText = questionButton.querySelector('.text-box')?.textContent.trim();
const answerText = panelBody.querySelector('.ce-bodytext')?.textContent.trim() || "";
result.push({
akkordeonId,
buttonId,
question: questionText,
answer: answerText,
qPlusA: questionText + " " + answerText,
tokens: tokenizeText(answerText)
});
}
});
});
return result;
}
/** conversion is needed for storage of embeddings */
export function convertToFloat32(inputEmbeddings) {
const convertedEmbeddings = {};
for (const [key, value] of Object.entries(inputEmbeddings)) {
convertedEmbeddings[key] = new Float32Array(Object.values(value));
}
return convertedEmbeddings;
}
\ No newline at end of file
/** open mail client of user with prewritten message and stats */
export function submitFeedback(query, bestToken, feedback) {
/** temporary mail adress, adjust if needed */
const feedbackAdress = 'huepfen-richtlinie.7r@icloud.com';
const timestamp = new Date().toISOString();
let subject = '';
let message = '';
if (feedback === 'up') {
subject = 'Positive Feedback: FAQ Search Feature';
message = `Hello,
\nI used your FAQ search feature and was pleased with the result. Details below:
\nQuery: ${query}\nBest Token: ${bestToken}\nFeedback: ${feedback}\nTimestamp: ${timestamp}
\nI also have some additional suggestions:
\nKeep it up!
\nBest regards`;
} else if (feedback === 'down') {
subject = 'Negative Feedback: FAQ Search Feature';
message = `Hello,
\nI tried used FAQ search feature, and unfortunately, I was not satisfied with this result. Detail below:
\nQuery: ${query}\nBest Token: ${bestToken}\nFeedback: ${feedback}\nTimestamp: ${timestamp}
\nI also have some additional suggestions:
\nI hope this feedback lets you improve the experience.
\nBest regards`;
}
const newSubject = encodeURIComponent(subject);
const body = encodeURIComponent(message);
const mailtoLink = `mailto:${feedbackAdress}?subject=${newSubject}&body=${body}`;
window.location.href = mailtoLink;
}
/** URL highlights stay in DOM until refresh of page; not ideal */
/** might leave it out */
export function highlightText(textToHighlight) {
const text = encodeURIComponent(textToHighlight);
let windows = window;
while (windows !== windows.parent) {
windows = windows.parent;
}
const currentUrl = windows.location.origin + windows.location.pathname + windows.location.search;
const newUrl = currentUrl + '#:~:text=' + text;
windows.location.href = newUrl;
}
import { pipeline, env } from './transformers.js';
/** set transformers.js variables so that nothing is loaded externally */
env.allowRemoteModels = false;
env.useBrowserCache = true;
env.localModelPath = './models/';
env.backends.onnx.wasm.wasmPaths = './wasm/'
let embedder;
let embeddingCache = {};
let tokenEmbeddingCache = {};
let queryEmbeddingCache = {};
/** getting extraction model ready */
export async function initializeModel() {
embedder = await pipeline(
'feature-extraction',
'Xenova/all-MiniLM-L6-v2'
);
}
/** cosine similarity works out of the box */
/** https://www.restack.io/p/similarity-search-answer-javascript-cosine-similarity-cat-ai */
export function cosineSimilarity(vectorA, vectorB) {
const dotProduct = vectorA.reduce((sum, value, index) => sum + value * vectorB[index], 0);
const magnitudeA = Math.sqrt(vectorA.reduce((sum, value) => sum + value * value, 0));
const magnitudeB = Math.sqrt(vectorB.reduce((sum, value) => sum + value * value, 0));
return dotProduct / (magnitudeA * magnitudeB);
}
/** various function to calculate the right embeddings */
/** -------------------------------------------------- */
function roundEmbedding(embedding) {
return embedding.data.map(v => Number(v.toFixed(3)));
}
export async function getAndRoundEmbedding(text) {
const embedding = await embedder(text, {
pooling: 'mean',
normalize: true
});
return roundEmbedding(embedding);
}
export async function computeEmbedding(text) {
return getCachedEmbedding(text, embeddingCache);
}
export async function computeTokenEmbedding(text) {
return getCachedEmbedding(text, tokenEmbeddingCache);
}
export async function computeQueryEmbedding(text) {
return getCachedEmbedding(text, queryEmbeddingCache);
}
export async function calcFAQEmbeddings(textData, type) {
return Promise.all(textData.map(item => computeEmbedding(item[type])));
}
export async function getEmbedding(text) {
const result = await embedder(text, {
pooling: 'mean',
normalize: true
});
return result.data;
}
/** -------------------------------------------------- */
/** checking if embedding already exists, if not calulate it */
export async function getCachedEmbedding(text, cache) {
if (cache[text]) {
return cache[text];
}
const emb = await getAndRoundEmbedding(text);
cache[text] = emb;
return emb;
}
export function getEmbeddingCache() {
return embeddingCache;
}
export function getTokenEmbeddingCache() {
return tokenEmbeddingCache;
}
export function setCaches(newEmbeddingCache, newTokenEmbeddingCache) {
embeddingCache = newEmbeddingCache;
tokenEmbeddingCache = newTokenEmbeddingCache;
}
/** turn a given text snippet in to tokens */
/** this could still be optimized with more elaborate regular expessions; good for now */
export function tokenizeText(text) {
const tokens = [];
const sentences = text.split(/(?<=[.!?])\s+/).filter(s => s.length > 0);
let buffer = "";
for (let i = 0; i < sentences.length; i++) {
buffer = buffer ? buffer + " " + sentences[i] : sentences[i];
if (buffer.length >= 10 || i === sentences.length - 1) {
tokens.push(buffer);
buffer = "";
}
}
return tokens;
}
/** getting token embeddings ready for later use in realtime search */
export async function precalcTokenEmbeddings(textData) {
const allTokens = textData.flatMap(item => tokenizeText(item.answer));
const uniqueTokens = Array.from(new Set(allTokens));
await Promise.all(uniqueTokens.map(token => computeTokenEmbedding(token)));
}
/** give each FAQ entry a score on how similiar it is to the given query */
/** return only the three most similiar */
export async function semanticSearch(query, textData, faqEmbeddings, topK = 3) {
const queryEmbedding = await computeQueryEmbedding(query);
return faqEmbeddings
.map((emb, index) => ({
question: textData[index].question,
answer: textData[index].answer,
buttonId: textData[index].buttonId,
akkordeonId: textData[index].akkordeonId,
qPlusA: textData[index].qPlusA,
token: textData[index].token,
score: cosineSimilarity(queryEmbedding, emb),
}))
.sort((a, b) => b.score - a.score)
.slice(0, topK);
}
export async function findBestToken(query, text) {
const tokens = tokenizeText(text);
const queryEmbedding = await computeQueryEmbedding(query);
let bestToken = null;
let bestScore = -Infinity;
for (const token of tokens) {
const tokenEmb = await computeTokenEmbedding(token);
const score = cosineSimilarity(queryEmbedding, tokenEmb);
if (score > bestScore) {
bestScore = score;
bestToken = token;
}
}
return { bestToken };
}
Source diff could not be displayed: it is too large. Options to address this: view the blob.
import { highlightText } from "./highlightText.js";
import { submitFeedback } from "./feedback.js";
/** handle what needs to be displayed in chat container */
export function appendMessage(content, className, linkId = null, akkordeonId = null, bestToken = null, query = null) {
const messageContainer = document.createElement('div');
if (className === 'user-message') {
const messageDiv = document.createElement('div');
messageDiv.textContent = content;
messageDiv.className = className;
messageContainer.appendChild(messageDiv);
} else {
/** drawing what chat response looks like */
messageContainer.className = 'bot-message-container';
const numberBox = document.createElement('div');
numberBox.textContent = "⦿";
numberBox.className = 'number-box';
const messageDiv = document.createElement('div');
messageDiv.textContent = content;
messageDiv.className = className;
const panelDiv = document.createElement('div');
panelDiv.textContent = '';
panelDiv.className = 'info-panel';
panelDiv.onclick = function () {
linkToPanel(linkId, akkordeonId, bestToken);
};
const feedbackContainer = document.createElement('div');
feedbackContainer.className = 'feedback-container';
const plusButton = document.createElement('button');
plusButton.innerHTML = '+';
plusButton.className = 'feedback-button thumbs-up';
plusButton.onclick = function () {
submitFeedback(query, bestToken, 'up');
};
const minusButton = document.createElement('button');
minusButton.innerHTML = '-';
minusButton.className = 'feedback-button thumbs-down';
minusButton.onclick = function () {
submitFeedback(query, bestToken, 'down');
};
messageContainer.appendChild(numberBox);
messageContainer.appendChild(messageDiv);
messageContainer.appendChild(panelDiv);
feedbackContainer.appendChild(plusButton);
feedbackContainer.appendChild(minusButton);
messageContainer.appendChild(feedbackContainer);
}
const messagesDiv = document.getElementById('messages');
messagesDiv.appendChild(messageContainer);
messagesDiv.scrollTop = messagesDiv.scrollHeight;
}
/** gets query from input field and performs search*/
export function sendMessage(worker) {
const userInput = document.getElementById('user-input');
const message = userInput.value.trim();
if (message !== "") {
appendMessage(message, 'user-message');
userInput.value = '';
worker.postMessage({
act: 'semanticSearch',
query: message,
});
}
}
/** scrolls to the relavant panel and shows hidden answer text */
export function linkToPanel(buttonId, akkordeonId, bestToken) {
const selector = (window !== window.parent.parent) ? window.parent.parent.document : document;
const parentContainer = selector.querySelector(`#${akkordeonId}`);
const panelButton = parentContainer.querySelector(`#${buttonId}`);
if (panelButton.classList.contains('collapsed')) {
panelButton.click();
}
panelButton.scrollIntoView({ behavior: 'smooth', block: 'center' });
/** might leave highlighing out; works, but highlights are persistent until DOM is reloaded */
setTimeout(() => { highlightText(bestToken); }, 1000);
}
/** lets results in results iframe scroll to right panel */
window.linkToPanel = linkToPanel;
import { appendMessage } from './ui.js';
let processQueryCache = null;
export function handleWorker(events){
const { act, results, embeddingCache: updatedCache, tokenEmbeddingCache: updatedTokenCache, query } = events.data;
switch (act) {
case 'initialized':
localStorage.setItem('largeEmbedding', JSON.stringify(updatedCache));
break;
case 'tokeninit':
localStorage.setItem('smallEmbedding', JSON.stringify(updatedTokenCache));
break;
case 'searchResults':
/** pass our three results to ui so they can be displayed and worked with in chat */
results.forEach(result => {
appendMessage(
result.question,
'bot-message',
result.buttonId,
result.akkordeonId,
result.bestToken,
query
);
});
break;
case 'topResults':
/** show results live in result iframe */
if (processQueryCache) {
const outputs = results.slice(0, 3).map(result => ({
question: result.question,
bestToken: result.bestToken,
buttonId: result.buttonId,
akkordeonId: result.akkordeonId,
}));
processQueryCache(outputs);
processQueryCache = null;
}
break;
}
}
export function processQuery(query, worker, cache) {
processQueryCache = cache;
worker.postMessage({
act: 'resultsSemantic',
query: query,
});
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment