petercat-ai · RaoHai · Apr 16, 2024 · Apr 16, 2024
diff --git a/README.md b/README.md
@@ -1,3 +1,26 @@
 # Lambda Layer for python NLTK package
 
+Credit to https://github.com/customink/lambda-python-nltk-layer 
+
 Lambda layer to enable using famous NLTK python package with lambda.
+
+Work with Lambda functions packaged as both docker images and Zip packages.
+
+### Lambda functions packaged as Docker Images or OCI Images
+
+To use Lambda NLTK with docker images, package your web app (http api) in a Dockerfile, and add one line to copy NLTK data files to /opt/nltk_data inside your container:
+
+Pre-compiled Lambda NLTK data are provided in ECR public repo: public.ecr.aws/m5s2b0d4/nltk_lambda_layer. 
+
+```dockerfile
+COPY --from=public.ecr.aws/m5s2b0d4/nltk_lambda_layer:latest /nltk_data /opt/nltk_data
+```
+
+Then add one line to config env variable `NLTK_DATA` 
+
+```dockerfile
+ENV NLTK_DATA=/opt/nltk_data
+``
+
+
+Below is a Dockerfile for [an example nodejs application](examples/Dockerfile).
diff --git a/examples/Dockerfile b/examples/Dockerfile
@@ -0,0 +1,12 @@
+FROM public.ecr.aws/docker/library/python:3.12.0-slim-bullseye
+
+COPY --from=public.ecr.aws/m5s2b0d4/nltk_lambda_layer:preview /nltk_data /opt/nltk_data
+
+# Copy function code
+COPY . ${LAMBDA_TASK_ROOT}
+# from your project folder.
+COPY requirements.txt .
+RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" -U --no-cache-dir
+
+ENV NLTK_DATA=/opt/nltk_data
+CMD ["python", "main.py"]
diff --git a/examples/main.py b/examples/main.py
@@ -0,0 +1,21 @@
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+
+example_sent = """This is a sample sentence,
+                  showing off the stop words filtration."""
+
+stop_words = set(stopwords.words('english'))
+
+word_tokens = word_tokenize(example_sent)
+# converts the words in word_tokens to lower case and then checks whether 
+#they are present in stop_words or not
+filtered_sentence = [w for w in word_tokens if not w.lower() in stop_words]
+#with no lower case conversion
+filtered_sentence = []
+
+for w in word_tokens:
+    if w not in stop_words:
+        filtered_sentence.append(w)
+
+print(word_tokens)
+print(filtered_sentence)
diff --git a/examples/requirements.txt b/examples/requirements.txt
@@ -0,0 +1 @@
+nltk