From 250adee628851a2e28e779765feaccf454b2bd2c Mon Sep 17 00:00:00 2001 From: Ken Kahn Date: Mon, 29 Jan 2024 12:30:14 +0800 Subject: [PATCH] Complete draft of embedding active essay --- apps/embeddings/3D/index.html | 73 +++-- apps/embeddings/3D/v4.html | 548 ++++++++++++++++++++++++++++++++ apps/embeddings/index.html | 239 ++++++++++++++ apps/embeddings/star/index.html | 59 +++- apps/embeddings/star/v5.html | 424 ++++++++++++++++++++++++ 5 files changed, 1298 insertions(+), 45 deletions(-) create mode 100644 apps/embeddings/3D/v4.html create mode 100644 apps/embeddings/index.html create mode 100644 apps/embeddings/star/v5.html diff --git a/apps/embeddings/3D/index.html b/apps/embeddings/3D/index.html index 2a0843e..ff9a3a6 100644 --- a/apps/embeddings/3D/index.html +++ b/apps/embeddings/3D/index.html @@ -35,7 +35,7 @@ } /* Style adjustments for the button to make it align to the right of the word input */ - .word-input-group { +.word-input-group { display: flex; justify-content: flex-start; /* Align items to the start */ align-items: center; @@ -76,20 +76,21 @@ } /* Adjust the dropdown width as needed */ - #embeddingsList { +#embeddingsList { min-width: 140px; /* Match the width of the other buttons */ width: auto; /* Set a fixed width or auto */ flex-grow: 0; /* Prevent the dropdown from growing */ } - .container { - display: flex; - flex-direction: column; - align-items: center; - justify-content: center; + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + max-width: 800px; /* Limit the maximum width */ + margin: auto; /* Center the container */ } - + .input-group, .word-input-group, .button-group { @@ -100,7 +101,7 @@ } /* Button styles */ - .button { +.button { padding: 10px 20px; /* Adjust padding for the button */ background-color: #4CAF50; color: white; @@ -155,12 +156,13 @@ /* Modal Content */ .modal-content { - background-color: #fefefe; - margin: 15% auto; /* 15% from the top and centered */ - padding: 20px; - border: 1px solid #888; - width: 80%; /* Could be more or less, depending on screen size */ - border-radius: 5px; + background-color: #fefefe; + margin: 15% auto; /* 15% from the top and centered */ + padding: 20px; + border: 1px solid #888; + width: 80%; /* Could be more or less, depending on screen size */ + max-width: 600px; /* Set a maximum width for the modal */ + border-radius: 5px; } /* The Close Button */ @@ -178,6 +180,12 @@ cursor: pointer; } +/* Additional style for the canvas */ +#visualization canvas { + max-width: 100%; /* Ensure canvas does not exceed the container width */ + height: 600px; +} + @@ -215,19 +223,24 @@ @@ -252,8 +265,8 @@

Instructions:

]; // Validate input - if (word === '' || values.some(isNaN)) { - alert('Please enter a word and specify values for all dimensions.'); + if (word === '' || values.some(isNaN) || values.some(value => value < 0 || value > 9)) { + alert('Please enter a word and specify values between 0 and 9 for all dimensions.'); return; } diff --git a/apps/embeddings/3D/v4.html b/apps/embeddings/3D/v4.html new file mode 100644 index 0000000..ff9a3a6 --- /dev/null +++ b/apps/embeddings/3D/v4.html @@ -0,0 +1,548 @@ + + + + Word Embedding Visualization + + + + + + +
+ +
+ + +
+
+ + +
+
+ + +
+ + +
+ + +
+ +
+ + + +
+ + + + + + + + +
+
+ + + \ No newline at end of file diff --git a/apps/embeddings/index.html b/apps/embeddings/index.html new file mode 100644 index 0000000..d72c4a6 --- /dev/null +++ b/apps/embeddings/index.html @@ -0,0 +1,239 @@ + + + + + Word and Sentence Embeddings + + + + +
+
+

Language through the Lens of AI: The Story of Embeddings

+

+ Authored by + Ken Kahn +
+ Contact: + toontalk@gmail.com +

+
+
+ +
+

+ In the world of natural language processing, + embeddings transform words and sentences into sequences of numbers, + allowing computers to grasp language. +

+

+ This technology powers tools like Siri and Alexa, + and translation services like Google Translate. +

+

+ Generative AI systems, such as ChatGPT, Bard, and DALL-E, + leverage these embeddings to understand and generate human-like text, + create art, or answer complex queries. + These advancements showcase the pivotal role of embeddings + in bridging human communication with machine intelligence. +

+
+ +
+

Hand-Crafted Embeddings

+

+ In the early days of language processing, before the advent of advanced machine learning techniques, + embeddings were meticulously crafted by hand. + Linguists and computer scientists collaborated to create these embeddings, + embedding each word into a numerical space based on its meaning and context. + This process involved analyzing the relationships between words and manually assigning values to capture these relationships. +

+

+ For example, words with similar meanings would be placed close together in this numerical space, + while those with different meanings would be positioned further apart. + This method, though innovative, had its limitations. + It was time-consuming and could not easily adapt to the nuances of language and evolving vocabulary. + However, these early endeavors laid the groundwork for the more sophisticated, + automated embedding techniques that are used in NLP today. +

+

+ The 3D hand-crafted embedding app, which can be explored interactively below, provides a tangible experience of this concept. + Users are invited to input words along with numerical values in three dimensions. + For instance, in the context of animals, dimensions such as size, life span, and friendliness can be explored. + This interactive visualization aids in understanding how words or concepts are positioned relative to each other in a defined space, + providing an engaging and educational insight into the foundational aspects of embeddings. +

+
+ +
+
+ +
+

Visualizing Embeddings: The Star Approach

+

+ The star visualization method offers an innovative and intuitive method to understand word embeddings. + In this approach, each element of an embedding vector is represented as a line originating from a central point, creating a pattern akin to a star. + The length and direction of these lines correlate with the values in the embedding, bringing a tangible visual form to complex data. + This visualization not only makes it easier to interpret the multidimensional aspects of language but also adds a layer of aesthetic appeal to the study of linguistics. +

+

+ The interactive element below offers a unique opportunity to visualize the embeddings you've created in the previous app. + Before proceeding, ensure you save your hand-crafted embeddings using the 'Save Embeddings' feature. + You can then load and explore these embeddings here, witnessing how your definitions translate into a dynamic 3D space. + This continuity between the apps enhances your understanding of embeddings and their practical visualization. +

+
+ +
+
+ +
+

Machine Learning-Generated Word Embeddings

+

+ The advent of machine learning models like Word2Vec and GloVe marked a significant milestone in the evolution of word embeddings. + These models revolutionized the way computers understand human language by automatically generating word embeddings from large text datasets. + Unlike hand-crafted embeddings, these machine learning-based approaches can capture a vast array of linguistic nuances, enabling a deeper understanding of language semantics and syntax. + The embeddings generated by these models reflect the contextual relationships and associations that words share within a language. +

+

+ This advancement has profoundly impacted various applications in NLP, from enhancing search engine algorithms to improving the accuracy of voice recognition systems. + The ability of these models to process and analyze vast amounts of text data has opened new avenues in language understanding, making technology more intuitive and responsive to human communication. +

+

+ Among these innovations is the Universal Sentence Encoder (USE), developed by researchers at Google. + USE extends the concept of word embeddings to entire sentences, + providing a more nuanced and comprehensive representation of language. + By analyzing large text corpora, USE captures the essence of sentences, facilitating tasks like text classification, + semantic similarity assessment, and clustering. + The interactive element below utilizes USE to visualize sentence embeddings, + illustrating the sophisticated capabilities of modern NLP techniques. +

+
+ +
+
+ +
+

Exploring Embeddings with TensorFlow Projector

+

+ TensorFlow Projector is an advanced tool that allows for an interactive exploration of high-dimensional data, + such as word and sentence embeddings. + It provides a visual platform where embeddings can be plotted in 3D or 2D space, + offering insights into how machine learning models perceive and organize linguistic elements. + Users can navigate through this space, observe clusters and relationships between words or sentences, + and gain a deeper understanding of how embeddings capture the nuances of language. +

+

+ This tool exemplifies the power of embeddings in machine learning, + showcasing the intricate patterns and structures that emerge from large-scale language data. + By using TensorFlow Projector, users can visually dissect the complex landscape of embeddings, + making abstract concepts more tangible and comprehensible. +

+

+ The Projector is best explored in a full-size window for a more immersive experience. Launch TensorFlow Projector. +

+
+ +
+
+ +
+

The Power of Sentence Embeddings

+

+ One fascinating aspect of word embeddings is their ability to perform arithmetic operations. + This capability allows for intriguing applications such as solving analogies or understanding word relationships. + For instance, by manipulating the embeddings, it's possible to discover that adding 'king' to 'woman' and subtracting 'man' results in an embedding similar to 'queen'. + Such operations demonstrate the nuanced understanding these models have of word meanings and relationships. +

+

+ Moreover, when visualizing the difference between two similar words, sentences, or expressions using the star visualization, + we expect to see many short rays. This is because the closer the meanings or contexts of the expressions, + the smaller the differences in their embeddings, resulting in shorter rays in the star visualization. + This visual pattern is a powerful tool for exploring and understanding linguistic similarities. +

+
+ +
+
+ +
+

Further Reading and References

+

+ To delve deeper into the world of NLP and embeddings, consider exploring additional resources and academic papers. + These materials can offer a more in-depth understanding of the theories and practical applications of NLP, + including the latest advancements and research findings. + Academic journals, online courses, and specialized blogs in this field are great places to start for those interested in furthering their knowledge. +

+

+ To learn more about word embeddings, visit the Wikipedia page on Word Embeddings. +

+
+ +
+

Behind the Scenes: Creating This Active Essay and Apps

+

+ The development of this active essay and the accompanying apps was an iterative and collaborative process. + It involved a detailed exploration of natural language processing and embeddings, + followed by the design and implementation of interactive web applications to visualize these concepts. + Throughout the journey, key topics such as hand-crafted embeddings, machine learning-generated embeddings, + and the novel star visualization approach were explored and integrated into the essay. + This process, a blend of technical development and educational content creation, + is documented in further detail in a discussion which can be explored here. +

+
+ + + + \ No newline at end of file diff --git a/apps/embeddings/star/index.html b/apps/embeddings/star/index.html index f7972f9..07471ef 100644 --- a/apps/embeddings/star/index.html +++ b/apps/embeddings/star/index.html @@ -65,10 +65,12 @@ .modal-content { background-color: #fefefe; - margin: 5% auto; + margin: 15% auto; /* 15% from the top and centered */ padding: 20px; border: 1px solid #888; - width: 80%; + width: 80%; /* Could be more or less, depending on screen size */ + max-width: 600px; /* Set a maximum width for the modal */ + border-radius: 5px; } .close { @@ -85,9 +87,16 @@ cursor: pointer; } +iframe { + width: 100%; /* This ensures the iframe takes the full width of its container */ + max-width: 800px; /* Adjust this value to match the width of your other content */ + height: 600px; /* Adjust height as needed */ + border: none; + margin-top: 20px; + margin-bottom: 20px; +} - @@ -110,7 +119,7 @@ - + \ No newline at end of file diff --git a/apps/embeddings/star/v5.html b/apps/embeddings/star/v5.html new file mode 100644 index 0000000..07471ef --- /dev/null +++ b/apps/embeddings/star/v5.html @@ -0,0 +1,424 @@ + + + + Star Visualization of Embeddings + + + + + + + + +
+ + +
+
+ + +
+ + + + + + + + + \ No newline at end of file