diff --git a/reinvent-2019/README.md b/reinvent-2019/README.md index 7c81224d..342193fa 100644 --- a/reinvent-2019/README.md +++ b/reinvent-2019/README.md @@ -21,6 +21,7 @@ A collection of templates, sample code and documentation from the re:Invent 2019 | [Polyglot Bot](polyglot-bot) | A greeter bot that recognizes your greeting language and wish you back in the same language with hand gesture | | [Puente](puente) | Puente is a machine learning model that recognizes static American Sign Language letters | | [Rhythm Cloud](rhythm-cloud) | Learn to play the drums and learn AWS IoT | +| [Sign & Speak](sign-and-speak) | A communication tool for users of sign language and users of spoken language, allowing for simple conversation through the use of AI/ML | | [Skittles Sorter](skittle-sorter) | The Skittles Sorter automagically sorts your favorite snack based on their color, powered by the AWS IoT suite of services | | [Smart Garage](smart-garage) | Control your garage using Alexa using Raspberry Pi | | [Sports Scoring using Machine Learning](sports-scoring-using-machine-learning) | Track and display the score of a Cornhole game using a camera and Amazon SageMaker | diff --git a/reinvent-2019/sign-and-speak/CloudFormation/cf.json b/reinvent-2019/sign-and-speak/CloudFormation/cf.json new file mode 100644 index 00000000..64ff8098 --- /dev/null +++ b/reinvent-2019/sign-and-speak/CloudFormation/cf.json @@ -0,0 +1,661 @@ +{ + "Parameters":{ + "PARAMFFMPEGLayer" : { + "Default" : "", + "Description" : "ARN with version of the FFMPEG Layer", + "Type" : "String" + }, + "PARAMS2SEndpoint" : { + "Default" : "", + "Description" : "Endpoint of the S2S Model on Sagemaker", + "Type" : "String" + } + }, + "Resources": { + "messagestable": { + "Type": "AWS::DynamoDB::Table", + "Properties": { + "TableName": "messages-cf", + "AttributeDefinitions": [ + { + "AttributeName": "msgid", + "AttributeType": "S" + } + ], + "KeySchema": [ + { + "AttributeName": "msgid", + "KeyType": "HASH" + } + ], + "ProvisionedThroughput": { + "ReadCapacityUnits": 1, + "WriteCapacityUnits": 1 + }, + "StreamSpecification": { + "StreamViewType": "NEW_IMAGE" + } + } + }, + "messagestablestream": { + "Type": "AWS::Lambda::EventSourceMapping", + "DependsOn" : ["messagestable", "sendmessages2swslambda"], + "Properties": { + "BatchSize": 1, + "Enabled": true, + "EventSourceArn": {"Fn::GetAtt": ["messagestable","StreamArn"]}, + "FunctionName": {"Fn::GetAtt": ["sendmessages2swslambda","Arn"]}, + "StartingPosition": "LATEST" + } + }, + "wsclientstable": { + "Type": "AWS::DynamoDB::Table", + "Properties": { + "TableName": "ws-clients-cf", + "AttributeDefinitions": [ + { + "AttributeName": "connectionId", + "AttributeType": "S" + } + ], + "KeySchema": [ + { + "AttributeName": "connectionId", + "KeyType": "HASH" + } + ], + "ProvisionedThroughput": { + "ReadCapacityUnits": 1, + "WriteCapacityUnits": 1 + } + } + }, + "S3Bucket" : { + "DependsOn" : ["videotogridlambda","infersignlambda"], + "Type" : "AWS::S3::Bucket", + "Properties" : { + "BucketName" : "signs-data-cf", + "CorsConfiguration": { + "CorsRules": [ + { + "AllowedHeaders": [ + "*" + ], + "AllowedMethods": [ + "GET","PUT","POST" + ], + "AllowedOrigins": [ + "*" + ], + "Id": "myCORSRuleId1", + "MaxAge": "3600" + } + ] + } + } + }, + "IAMS2SExecutionRole" : { + "Type" : "AWS::IAM::Role", + "Properties" : { + "RoleName" : {"Fn::Join": ["", [{"Ref": "AWS::Region"}, "s2s-iam-role-cf"]]}, + "AssumeRolePolicyDocument": { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Service": [ + "lambda.amazonaws.com" + ] + }, + "Action": [ + "sts:AssumeRole" + ] + } + ] + }, + "Policies" : [ + { + "PolicyName": "s2s-iam-policy-cf", + "PolicyDocument" : { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "sagemakerPolicies", + "Effect": "Allow", + "Action": [ + "sagemaker:InvokeEndpoint" + ], + "Resource": { + "Fn::Join":[ + "", + ["arn:aws:sagemaker:*:*:endpoint/",{"Ref" : "PARAMS2SEndpoint"}] + ] + } + }, + { + "Sid": "OtherPolicies", + "Effect": "Allow", + "Action": [ + + "execute-api:ManageConnections" + ], + "Resource": "arn:aws:execute-api:*:*:s2s-ws-cf/Prod/*/*" + }, + { + "Sid": "S3Policy", + "Effect": "Allow", + "Action": [ + "s3:GetObjectTagging", + "s3:PutObjectTagging", + "s3:PutObjectAcl", + "s3:ListBucket", + "s3:GetObject*", + "s3:PutObject", + "s3:GetObject" + ], + "Resource": "arn:aws:s3:::signs-data-cf/*" + }, + { + "Sid": "DynamoDBPolicy", + "Effect": "Allow", + "Action": [ + "dynamodb:GetShardIterator", + "dynamodb:GetRecords", + "dynamodb:DescribeStream", + "dynamodb:GetItem", + "dynamodb:Query", + "dynamodb:Scan", + "dynamodb:DeleteItem", + "dynamodb:PutItem", + "dynamodb:ListStreams" + ], + "Resource": [ + "arn:aws:dynamodb:*:*:table/messages-cf", + "arn:aws:dynamodb:*:*:table/messages-cf/stream/*", + "arn:aws:dynamodb:*:*:table/ws-clients-cf" + ] + }, + { + "Sid": "AllowLogsPolicy", + "Effect": "Allow", + "Action": [ + "logs:CreateLogGroup", + "logs:DescribeLogStreams", + "logs:PutLogEvents", + "logs:CreateLogStream" + ], + "Resource": "arn:aws:logs:*:*:*" + } + ] + } + } + ], + "Description" : "Allows Lambda functions to call AWS services on your behalf. " + } + }, + "wsconnectlambda" : { + "Type": "AWS::Lambda::Function", + "DependsOn" : ["IAMS2SExecutionRole"], + "Properties": { + "Description" : "Triggered on s2s ui client connecting to api websocket", + "FunctionName" : "s2s-ws-connect-cf", + "Handler": "index.lambda_handler", + "Role": { + "Fn::GetAtt": ["IAMS2SExecutionRole","Arn"] + }, + "Environment" : { + "Variables" : { + "messagestable" : {"Ref" : "messagestable"}, + "wsclientstable" : {"Ref" : "wsclientstable"} + } + }, + "Runtime": "python3.7", + "Timeout": 120, + "Code" : { + "ZipFile": { "Fn::Join": ["\n", [ + "import boto3 ", + "import json ", + "from datetime import datetime, timedelta ", + "import time ", + "import os", + "def lambda_handler(event, context): ", + " msgBody = (event) ", + " reqContext = event[\"requestContext\"] ", + " connectionId = reqContext[\"connectionId\"] ", + " #store to dynamo db ", + " ddbclient = boto3.client(\"dynamodb\") ", + " nowDTTM = datetime.utcnow().strftime(\"%Y-%m-%d %H:%M:%S\") # \"2019-05-22 06:06:42 ", + " epocSec = int(time.time()) ", + " response = ddbclient.put_item( ", + " Item={ ", + " \"connectionId\": { ", + " \"S\": str(connectionId),", + " }, ", + " \"insertdttm\" : { ", + " \"S\": nowDTTM ", + " }, ", + " \"epocSecond\" : { ", + " \"N\" : str(epocSec) ", + " } ", + " },", + " TableName=os.environ[\"wsclientstable\"], ", + " ) ", + " print(response) ", + " return { ", + " \"statusCode\": 200,", + " \"body\": json.dumps(\"Successful connect\")", + " } " + ]]} + } + } + }, + "wsconnectlambdapermission": { + "Type": "AWS::Lambda::Permission", + "DependsOn" : [ "s2swsapi" , "wsconnectlambda" ] , + "Properties": { + "FunctionName": { "Fn::GetAtt": [ "wsconnectlambda", "Arn" ] }, + "Action": "lambda:InvokeFunction", + "Principal": "apigateway.amazonaws.com", + "SourceArn": { + "Fn::Join": [ + "", + [ + "arn:aws:execute-api:", + { "Ref": "AWS::Region" }, + ":", + { "Ref": "AWS::AccountId" }, + ":", + { "Ref": "s2swsapi" }, + "/*/*"] + ] + } + + } + + }, + "s2swsapi": { + "Type": "AWS::ApiGatewayV2::Api", + "Properties": { + "Name": "s2s-ws-cf", + "ProtocolType": "WEBSOCKET", + "RouteSelectionExpression": "$request.body.action" + } + }, + "s2swsapiconnectintegration" : + { + "Type" : "AWS::ApiGatewayV2::Integration", + "DependsOn" : ["wsconnectlambda","s2swsapi"], + "Properties" : { + "ApiId" : { + "Ref": "s2swsapi" + }, + "Description" : "s2s Web Socket Connect API Integration", + "IntegrationType" : "AWS_PROXY", + "IntegrationUri" : { + "Fn::Join": [ + "", + [ + "arn:aws:apigateway:", + { + "Ref": "AWS::Region" + }, + ":lambda:path/2015-03-31/functions/", + { + "Fn::GetAtt" : [ "wsconnectlambda", "Arn" ] + }, + "/invocations" + ] + ] + } + } + }, + "s2swsconnectroute" : { + "Type" : "AWS::ApiGatewayV2::Route", + "DependsOn" : [ + "s2swsapi", "s2swsapiconnectintegration" + ], + "Properties" :{ + "ApiId": { + "Ref" : "s2swsapi" + }, + "RouteKey": "$connect", + "AuthorizationType": "NONE", + "OperationName": "ConnectRoute", + "Target": { + "Fn::Join": [ + "/", + [ + "integrations", + { + "Ref": "s2swsapiconnectintegration" + } + ] + ] + } + } + }, + "s2swsdeployment" : { + "Type": "AWS::ApiGatewayV2::Deployment", + "DependsOn": [ + "s2swsconnectroute", "s2swsdisconnectroute" + ], + "Properties": { + "Description": "Deployment of S2S WS API", + "ApiId": { + "Ref": "s2swsapi" + } + } + }, + "s2swsprodstage": { + "Type": "AWS::ApiGatewayV2::Stage", + "DependsOn": [ + "s2swsdeployment" + ], + "Properties": { + "StageName": "Prod", + "Description": "Prod Stage", + "DeploymentId": { + "Ref": "s2swsdeployment" + }, + "ApiId": { + "Ref": "s2swsapi" + } + } + }, + "wsdisconnectlambda" : { + "Type": "AWS::Lambda::Function", + "DependsOn" : ["IAMS2SExecutionRole"], + "Properties": { + "Description" : "Triggered on s2s ui client disconnecting to api websocket", + "FunctionName" : "s2s-ws-disconnect-cf", + "Handler": "index.lambda_handler", + "Role": { + "Fn::GetAtt": ["IAMS2SExecutionRole","Arn"] + }, + "Environment" : { + "Variables" : { + "messagestable" : {"Ref" : "messagestable"}, + "wsclientstable" : {"Ref" : "wsclientstable"} + } + }, + "Runtime": "python3.7", + "Timeout": 120, + "Code" : { + "ZipFile": { "Fn::Join": ["\n", [ + "import json ", + "import boto3", + "import os", + "def lambda_handler(event, context):", + " msgBody = (event)", + " reqContext = event[\"requestContext\"]", + " connectionId = str(reqContext[\"connectionId\"])", + " print(\"connectionId is : \" + connectionId)", + " dynamodb = boto3.resource('dynamodb') ", + " table = dynamodb.Table(os.environ[\"wsclientstable\"]) ", + " response = table.delete_item( ", + " Key={'connectionId' : connectionId} ", + " ) ", + " return {", + " 'statusCode': 200, ", + " 'body': json.dumps('Successful disconnect') ", + " } " + + + ]]} + } + } + }, + "wsdisconnectlambdapermission": { + "Type": "AWS::Lambda::Permission", + "DependsOn" : [ "s2swsapi" , "wsdisconnectlambda" ] , + "Properties": { + "FunctionName": { "Fn::GetAtt": [ "wsdisconnectlambda", "Arn" ] }, + "Action": "lambda:InvokeFunction", + "Principal": "apigateway.amazonaws.com", + "SourceArn": { + "Fn::Join": [ + "", + [ + "arn:aws:execute-api:", + { "Ref": "AWS::Region" }, + ":", + { "Ref": "AWS::AccountId" }, + ":", + { "Ref": "s2swsapi" }, + "/*/*"] + ] + } + } + }, + "s2swsapidisconnectintegration" : + { + "Type" : "AWS::ApiGatewayV2::Integration", + "DependsOn" : ["wsdisconnectlambda","s2swsapi"], + "Properties" : { + "ApiId" : { + "Ref": "s2swsapi" + }, + "Description" : "s2s Web Socket Disconnect API Integration", + "IntegrationType" : "AWS_PROXY", + "IntegrationUri" : { + "Fn::Join": [ + "", + [ + "arn:aws:apigateway:", + { + "Ref": "AWS::Region" + }, + ":lambda:path/2015-03-31/functions/", + { + "Fn::GetAtt" : [ "wsdisconnectlambda", "Arn" ] + }, + "/invocations" + ] + ] + } + } + }, + "s2swsdisconnectroute" : { + "Type" : "AWS::ApiGatewayV2::Route", + "DependsOn" : [ + "s2swsapi", "s2swsapidisconnectintegration" + ], + "Properties" :{ + "ApiId": { + "Ref" : "s2swsapi" + }, + "RouteKey": "$disconnect", + "AuthorizationType": "NONE", + "OperationName": "DisconnectRoute", + "Target": { + "Fn::Join": [ + "/", + [ + "integrations", + { + "Ref": "s2swsapidisconnectintegration" + } + ] + ] + } + } + }, + "s2sclientuser" : { + "Type" : "AWS::IAM::User", + "Properties" : { + "UserName" : "s2sclientuser", + "Policies" : [ + { + "PolicyName": "s2s-clientui", + "PolicyDocument": { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "PutToS3SignBucket", + "Effect": "Allow", + "Action": [ + "s3:PutObject", + "s3:PutObjectTagging", + "s3:PutObjectAcl" + ], + "Resource": [ + "arn:aws:s3:::signs-data-cf/*" + ] + } + ] + } + } + ] + } + }, + "s2siampolicyclientapp" : { + "Type": "AWS::IAM::Policy", + "DependsOn" : "IAMS2SExecutionRole", + "Properties": { + "PolicyName": "s2s-clientui", + "PolicyDocument": { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "PutToS3SignBucket", + "Effect": "Allow", + "Action": [ + "s3:PutObject", + "s3:PutObjectTagging", + "s3:PutObjectAcl" + ], + "Resource": [ + "arn:aws:s3:::signs-data-cf/*" + ] + } + ] + }, + "Roles" : [{ + "Ref": "IAMS2SExecutionRole" + }] + } + }, + "videotogridlambda" : { + "Type": "AWS::Lambda::Function", + "DependsOn" : ["IAMS2SExecutionRole"], + "Properties": { + "Description" : "Convert video to grid image", + "FunctionName" : "python-video-to-grid-shell-cf", + "Handler": "index.lambda_handler", + "MemorySize" : "2048", + "Environment" : { + "Variables" : { + "messagestable" : {"Ref" : "messagestable"}, + "wsclientstable" : {"Ref" : "wsclientstable"} + } + }, + "Layers" : [{"Ref" : "PARAMFFMPEGLayer"}], + "Role": { + "Fn::GetAtt": ["IAMS2SExecutionRole","Arn"] + }, + "Runtime": "python3.7", + "Timeout": 300, + "Code" : { + "ZipFile": { "Fn::Join": ["\n", [ + "#Upload all code files from lambda/python-video-to-grid-shell folder" + ]]} + } + } + }, + "infersignlambda" : { + "Type": "AWS::Lambda::Function", + "DependsOn" : ["IAMS2SExecutionRole"], + "Properties": { + "Description" : "Infer sign from Grid Image by calling sagemaker endpoint", + "FunctionName" : "inferSign-cf", + "Handler": "index.lambda_handler", + "Role": { + "Fn::GetAtt": ["IAMS2SExecutionRole","Arn"] + }, + "Runtime": "python3.7", + "Timeout": 300, + "Environment" : { + "Variables" : { + "messagestable" : {"Ref" : "messagestable"}, + "wsclientstable" : {"Ref" : "wsclientstable"}, + "s2smodelendpoint" : {"Ref" : "PARAMS2SEndpoint"} + } + }, + "Code" : { + "ZipFile": { "Fn::Join": ["", [ + "#Upload all code files from lambda/infersign folder" + ]]} + } + } + }, + "sendmessages2swslambda" : { + "Type": "AWS::Lambda::Function", + "DependsOn" : ["IAMS2SExecutionRole"], + "Properties": { + "Description" : "Update S2S Client UI with inferred sign message", + "FunctionName" : "s2s-ws-sendmessage-node-cf", + "Handler": "index.handler", + "Role": { + "Fn::GetAtt": ["IAMS2SExecutionRole","Arn"] + }, + "Runtime": "nodejs12.x", + "Timeout": 300, + "Environment" : { + "Variables" : { + "messagestable" : {"Ref" : "messagestable"}, + "wsclientstable" : {"Ref" : "wsclientstable"}, + "wssurl" : { + "Fn::Join" : [ + "", + [ + {"Ref" : "s2swsapi" }, + ".execute-api.", + {"Ref" : "AWS::Region"}, + ".amazonaws.com/", + {"Ref" : "s2swsprodstage"} + ] + ] + } + } + }, + "Code" : { + "ZipFile": { "Fn::Join": ["", [ + "//Upload all code files from lambda/s2s-ws-sendmessage-node" + ]]} + } + } + } + + }, + "Outputs" : { + "S2SWebSocketURL" : { + "Description": "The WSS Protocol URI to connect to for S2S UI Client", + "Value" : { + "Fn::Join" : [ + "", + [ + "wss://", + {"Ref" : "s2swsapi" }, + ".execute-api.", + {"Ref" : "AWS::Region"}, + ".amazonaws.com/", + {"Ref" : "s2swsprodstage"} + ] + ] + } + }, + "ARNTableMessages" : { + "Description": "Arn of DynamoDB table MESSAGES to store all signs", + "Value" : {"Fn::GetAtt" : [ "messagestable", "Arn" ]} + }, + "ARNTableWsClients" : { + "Description": "Arn of DynamoDB table WS-CLIENTS to store client information", + "Value" : {"Fn::GetAtt" : [ "wsclientstable", "Arn" ]} + }, + "ARNS3Bucket" : { + "Description": "Arn of S3 Bucket SIGNS-DATA to store sign information", + "Value" : {"Fn::GetAtt" : [ "S3Bucket", "Arn" ]} + } + } +} \ No newline at end of file diff --git a/reinvent-2019/sign-and-speak/LICENSE b/reinvent-2019/sign-and-speak/LICENSE new file mode 100644 index 00000000..67db8588 --- /dev/null +++ b/reinvent-2019/sign-and-speak/LICENSE @@ -0,0 +1,175 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. diff --git a/reinvent-2019/sign-and-speak/README.md b/reinvent-2019/sign-and-speak/README.md new file mode 100644 index 00000000..15578a61 --- /dev/null +++ b/reinvent-2019/sign-and-speak/README.md @@ -0,0 +1,295 @@ +# Sign & Speak - AI-powered Communication + +

+ +## 1. Project Overview + +This repository provides the resources and instructions required to create your own version of the Sign & Speak project, which was on display at the Builder's Fair during re:Invent 2019. Sign & Speak uses machine learning to build a communication tool for users of sign language and users of spoken language. + +## 2. Project Abstract + +Sign & Speak facilitates communication between users of spoken language and users of sign language. By applying AI models trained to transcribe speech and interpret sign language, combined with a camera and a microphone, the tool enables two-way conversation in situations where communication was previously challenging. + +## 3. Participant Experience + +The Sign & Speak demo allows two participants to complete two scripted conversations, where one participant uses Auslan (Australian sign language) and one participant uses English. The Auslan user stands in front of a webcam, with a white background behind them, and is shown the 'Sign' page of the UI. The English user stands in front of a directional microphone, and is shown the 'Speak' page of the UI. Both UI components allow the participants to record their interaction through start/stop buttons. + +The table below shows the two scripted conversations supported in the demo, where *A* is the Auslan user and *E* is the English user. + +| Conversation #1 | Conversation #2 | +| ------------- | ------------- | +| **A:** Hello. | **A:** Pleased to meet you. | +| **E:** Hi! How are you? | **E:** Likewise. How are you? | +| **A:** Good. How are you? | **A:** Good. How are you? | +| **E:** I'm doing well. What are you planning tonight? | **E:** I'm doing well. What are you up to tonight? | +| **A:** Going to the pub. | **A:** Going to a restaurant. | +| **E:** Oh cool, I'd love to join you. What time are you going? | **E:** Sound great, I'd love to join you. At what time are you going? | +| **A:** At 20:00 | **A:** At 20:00 | +| **E:** See you there! | **E:** See you there! | +| **A:** Goodbye | **A:** Goodbye | + +In addition to the two-way conversation, the demo allows for individual participants to test the Auslan transcription model seperately. When testing the Auslan model, participants can choose from the following list of supported words and phrases: + +* Cat +* Friend +* Grandfather +* Grandmother +* Hello +* Goodbye +* Pleased to meet you +* Good! How are you? +* Thank you +* Eight 'o clock +* Restaurant +* Pub + +## 4. Architecture + +The image below shows the full architecture for the two-way communication demo. + +### Sign Flow ### +1. A video recording is made of the Auslan user signing a word or phrase. This video is uploaded to an Amazon S3 bucket. +1. The video upload triggers an AWS Lambda function which transforms the video into an image (a grid of frames). +1. A second AWS Lambda function sends the image to an Amazon SageMaker inference endpoint and waits for the response. It stores the resulting message in an Amazon DynamoDB table. + + +### Speak Flow +1. A stream of spoken words is sent to Amazon Transcribe to get a transcript. +1. This transcript is stored into the DynamoDB as a "Speech" message by a Lambda Function + +### UI Flow +- When a new Sign UI/Speak UI instance is started up, It makes a persistent connection to the Websocket API on the API Gateway +- Changes to the DynamoDB table are captured via a DynamoDB Stream and the inserted message is published to the API Gateway endpoint using another Lambda Function +- The Websocket API on the APIGateway publishes the message back to the Sign UI/ Speak UI which displays the formatted message + +

+ +## 5. User Guide + +This section describes how to set up the project on your own AWS account. + +### 5.1 Hardware and Equipment + +Below is a list of hardware and equipment used during the demo, in combination with the laptop running the demo itself. + +* Webcam with USB connector +* Directional microphone with USB connector +* White canvas background + stand +* Height-adjustable tripods for webcam and microphone +* Additional monitor (*optional*) +* Softbox lighting kit (*optional*) + +### 5.2 Machine Learning Model + +The sign language machine learning model is created using [PyTorch](https://pytorch.org/) in [Amazon SageMaker](https://aws.amazon.com/sagemaker/). This section describes the process of training a new model from scratch. + +#### 5.2.1 Creating a data set + +First, you need to decide on a set of words and short phrases which the demo should support. We used the list of 12 words and phrases listed in section 3. The model performs better on signs which are visually distinct, but with enough training data, it can distinguish between similar signs such as grandfather and grandmother. + +Second, you need to determine the audience for your demo. At re:Invent, we expected to see adult participants, with no prior knowledge of Auslan, and various nationalities, genders, clothing styles, and other visual features. To create a robust model for our expected audience, we asked 64 colleagues from different AWS offices to help us create training data. + +We controlled for factors such as background and lighting by choosing to only support a white background with even lighting. After completing the recording sessions with volunteers, and discarding unsuitable recordings, we were left with 42-72 videos per word or phrase. + +#### 5.2.2. Video preprocessing + +Each video recording of a word or phrase is transformed into an image representation for the purpose of performing image classification with the machine learning model. This processing is done through a combination of Bash and Python scripts executed by AWS Lambda. This section explains how the preprocessing generates an image from a video, and describes how to set up your own AWS Lambda function to support the process. + +To capture the movement (or time) element of signing a word or phrase, the image representation of a video is created as a 3x3 grid of video frames. [FFmpeg](https://ffmpeg.org/) is used to extract the key (non-blurry) frames from the video, then a Python script selects 9 key frames evenly spread across the length of the video, and FFmpeg is used to arrange these frames into the final grid structure. By selecting the frames according to the length of the video, this method is more robust to different speeds of signing. The image below illustrates the concept (blurred for anonymization only). + +

+ +#### 5.2.3 Training and deploying a model + +First, ensure that all training videos have been preprocessed into 3x3 grid images. Upload these images to an Amazon S3 bucket, organizing the images into folders based on their label (e.g. a folder for 'cat', a folder for 'pub', etc). + +Follow [these instructions](https://docs.aws.amazon.com/sagemaker/latest/dg/gs-setup-working-env.html) to set up an Amazon SageMaker instance on the smallest instance type (`ml.t3.medium`). If you want to pre-load the Sign & Speak scripts, simply add the URL to this GitHub repository in the 'Git repositories' section of the setup process. + +If you forgot to pre-load the Sign & Speak project, simply wait for the instance to show status `InService`, then click 'Open Jupyter'. In the new tab which opens, click on the 'New' drop-down menu in the top right corner, and choose 'Terminal' at the bottom of the list. From the terminal, you can `git clone` this repository onto the instance. + +### 5.3 Sign & Speak User Interface + +There are two key screens in the UI - a Sign UI and a Speak UI. Each of them is meant to allow the user to interact with the demo and get a 2-way conversation going. + +Each of the UI will support 4 key functions + +- **On load** : Connect to WebSocket API +- **On unload** : Disconnect from WebSocket API +- **Receive Message** : On receiving a message from the WebSocket API, update the Text Chat panel +- **Capture Intent** : Capture the intent of the user (e.g. capture user video in a Sign UI / capture spoken words from Speak UI) + +#### 5.3.1 Sign UI +The UI has two panels - Sign Panel and a Message Panel + +**Sign Panel** +- Start Sign Button > Starts recording the sign being performed by the user +- Stop Sign Button > Stops recording +- Upload Sign Button > Uploads the recorded video to S3 + +**Message Panel** +- Receive Message > Format the message and display in the Message Panel with a caption "Auslan" + +#### 5.3.2 Speak UI +**Speak Panel** +- Start Speaking > Starts capturing spoken words and stream to Amazon Transcribe endpoint +- Stop Speaking > Stops capturing spoken words and stop streaming to Amazon Transcribe endpoint +- Send Message > Sends the transcript of spoken text to an API Gateway endpoint, that saves it to the DynamoDB table using a Lambda function + +**Message Panel** +- Receive Message > Format the message and display in the Message Panel with a caption "English" + +**NOTE** : We have not uploaded the codes for the Speak UI, it follows the same behaviour seen at [aws-samples/amazon-transcribe-websocket-static](https://github.com/aws-samples/amazon-transcribe-websocket-static) + +## 6. Setup Instructions + +### 6.0 Pre-Setup + +**Setup the FFMPEG Lambda Layer** + +The stable release of FFmpeg at time of writing (4.2.1) does not contain all the features required to complete the preprocessing. We recommend downloading a [nightly build](https://johnvansickle.com/ffmpeg/) to access the latest features and bug fixes. We used the build from 26/08/2019 but would expect any later build or release to support the required functionality. To run the correct version of FFmpeg, you need to upload it as an [AWS Lambda layer](https://docs.aws.amazon.com/lambda/latest/dg/configuration-layers.html). Keep in mind the [size limits](https://docs.aws.amazon.com/lambda/latest/dg/limits.html) for AWS Lambda - only the main ffmpeg binary needs to be included in the ZIP package uploaded as a Lambda layer. Detailed instructions as follows: + +1. Download the `ffmpeg-git-amd64-static.tar.xz` from the [nightly build](https://johnvansickle.com/ffmpeg/) +1. Untar the tar file on your local machine +1. Create a zip package containing the `ffmpeg` binary file. Name the zip package however you like, but make sure the `ffmpeg` file is at the top level of the package +1. Navigate to AWS Lambda and upload the zip package as a layer +1. Make a note of the ARN + +**Setup the Machine Learning model for inference** + +Follow the instructions in `scripts/ML Instructions.ipynb` to train and deploy a model with your training data. Once you have an Amazon SageMaker endpoint, make a note of the ARN, and follow the instructions below to connect it to the UI. + +### 6.1 Installation + +Please follow the below steps after downloading the code to setup + +**[AWS] Setup the AWS Resources** +- Run the CloudFormation template at /CloudFormation/cf.json. (Note: This template has been tested in AWS Sydney region only) + - Give the name of the stack as "S2SAPP" + - Provide the parameters requested + - PARAMFFMPEGLayer : ARN with version of the FFMPEG Layer + - PARAMS2SEndpoint : URL of the S2S Model on Sagemaker + +**[AWS] IAM User with AccessKey** +- Go to IAM User "s2sclientuser" in IAM Console + - Click on the tab "Security Credentials" + - Click on "Create Access Key" + - Copy and Store the Access key ID/Secret access key, securely + +**[AWS] Lambda Functions : Update latest code** +- Copy the updated code for lambda functions. Create the files as necessary + - infersign-cf ( index.py ) + - python-video-to-grid-shell-cf( index.py , frame_picker.py , testscript.sh , video_to_grid.sh ) + - s2s-ws-connect-cf( index.py ) + - s2s-ws-disconnect-cf( index.py ) + - s2s-ws-sendmessage-node-cf ( index.js ) + +**[AWS] Update Lambda Trigger : python-video-to-grid-shell-cf** +- Open the Lambda Function python-video-to-grid-shell + - Click on Runtime dropdown, and select "Python3.8" + - Click on Add Trigger + - Trigger Configration: select S3 + - Bucket: signs-data-cf + - Event Type : "All object create events" + - Prefix : "02_app/upload/" + - Ensure "Enable Trigger" is checked + - Click Add + +**[AWS] Update Lambda Trigger : infersign-cf** +- Open the Lambda Function infersign + - Click on Add Trigger + - Trigger Configration: select S3 + - Bucket: signs-data-cf + - Event Type : "All object create events" + - Prefix : "02_app/grid-image/" + - Ensure "Enable Trigger" is checked + - Click Add + +**[LocalMachine] S2S Client UI** +- Copy the ui folder to a local directory + +**[LocalMachine] Update [LocalDir]\ui\static\js\sign.js** +- Update the app_credentials + - Put the AccessKeyID/SecretAccessKey in app_credentials +- Set the region + - Update the variable "app_region" based on the AWS Region used + +**[LocalMachine] Update [LocalDir]\ui\static\js\web-socket-msg.js** +- Update the WS URL + - Put the CloudFormation Template Output value for S2SWebSocketURL in "wsurl" variable + +### 6.3 Run Application +*Use Firefox (We have tested it only on this browser)* +- Navigate to the page \ui\sign.html +- Click on Start Sign/Stop Sign to record a sign +- Click on Upload Sign to trigger the process of inference +- Inferred sign message is displayed on the UI +- If you get an alert message "WebSocket connection is closed. Refresh screen!!!", then reload your UI. + +### 6.4 Uninstall + +**[AWS] Before Deleting the CloudFormation Stack** +- Make sure the S3 Bucket "signs-data-cf" is empty before deleting the Stack, otherwise it will fail. + - Delete the Cloudformation stack - "S2SAPP" + +## 7. FAQ + +**Q: There is more than one sign language?** + +**A:** Yes! By some estimates there are perhaps [300 sign languages](https://en.wikipedia.org/wiki/List_of_sign_languages). Although ASL (American Sign Language) is probably the most well-known of these languages, the Sign & Speak project was built to support [Auslan](https://en.wikipedia.org/wiki/Auslan) (Australian Sign Language). + +**Q: Will this method work for sign languages other than Auslan?** + +**A:** We believe our method can be applied to any sign language. All you need is the training data to train a new model for the sign language of your choice. We describe our approach for collecting training data in the User Guide section of this document. + +**Q: Can you share your Auslan data set and/or model?** + +**A:** To protect the privacy of the volunteers who helped us build our Auslan model, we will not release the data or the model. However, with the code made available in this repository, you can train a new model on your own data. + +**Q: What are the limitations of this method?** + +**A:** The method only works for individual signs, or short combinations of signs (e.g. 'pleased to meet you' consists of three signs). Due to the limit of 9 frames it will not support full sentences. Additionally, the demo performed well with 12 different labels, but would require significantly more training data to scale to larger numbers of supported labels. Finally, this method does not capture all the nuances of sign language, such as expression and context. + +**Q:What platform has the UI been tested on?** + +**A:** The UI has been tested to work on Windows 10, with Mozilla Firefox browser. It's tested to work in the AWS Sydney Region. + +**Q:** I only see a rotating circle on the UI! Help! + +**A:** Check the following steps have been performed correctly: +- Verify that all the lambda function code has been updated correctly +- Verify that the S3 Bucket triggers for inferSign-cf and python-video-to-grid-shell-cf are created +- Verify that you have changed the runtime language of lambda function python-video-to-grid-shell-cf to Python 3.8 +- Check CloudWatch logs for the lambda functions for errors + +**Q: What are the future plans for this project?** + +**A:** There are many ideas for improving and extending this project; below is a short, but incomplete list. +* Add support for full sign language sentences +* Add support for continuous sign language recognition +* Add a 3D avatar to turn text into sign language +* Improve the security of the application (e.g. Build in Authentication for UI and APIs) + +**Q: What is the animal in your logo?** + +**A:** It's a [quokka](https://duckduckgo.com/?q=quokka&t=ffnt&atb=v176-1&iax=images&ia=images), a marsupial found only in Australia. We are not professional artists. ;) + +## 8. Authors + +Sara 'Moose' van de Moosdijk, AWS ([GitHub](https://github.com/moose-in-australia/) | [LinkedIn](https://www.linkedin.com/in/saravandemoosdijk/)) + +Eshaan Anand, AWS ([GitHub](https://github.com/ea-coder) | [LinkedIn](https://sg.linkedin.com/in/eshaan-anand)) + +## 9. License + +This library is licensed under the Apache 2.0 License. + +## 10. References + +This project references the following libraries to put together the solution: + +- [jquery-1.12.4](https://jquery.com/) +- [aws-sdk.min.js](https://cdnjs.cloudflare.com/ajax/libs/aws-sdk/2.610.0/aws-sdk.min.js) +- [bootstrap](https://getbootstrap.com/) +- [RecordRTC.js](https://github.com/muaz-khan/RecordRTC) +- [adapter-latest.js](https://github.com/webrtc/adapter) +- [ffmpeg](https://ffmpeg.org/) \ No newline at end of file diff --git a/reinvent-2019/sign-and-speak/img/grid_concept.png b/reinvent-2019/sign-and-speak/img/grid_concept.png new file mode 100644 index 00000000..bd556e4f Binary files /dev/null and b/reinvent-2019/sign-and-speak/img/grid_concept.png differ diff --git a/reinvent-2019/sign-and-speak/img/sign-and-speak-architecture.png b/reinvent-2019/sign-and-speak/img/sign-and-speak-architecture.png new file mode 100644 index 00000000..c46bce1a Binary files /dev/null and b/reinvent-2019/sign-and-speak/img/sign-and-speak-architecture.png differ diff --git a/reinvent-2019/sign-and-speak/img/sign-and-speak-logo-small.png b/reinvent-2019/sign-and-speak/img/sign-and-speak-logo-small.png new file mode 100644 index 00000000..e9daf5a1 Binary files /dev/null and b/reinvent-2019/sign-and-speak/img/sign-and-speak-logo-small.png differ diff --git a/reinvent-2019/sign-and-speak/lambda/inferSign-cf/index.py b/reinvent-2019/sign-and-speak/lambda/inferSign-cf/index.py new file mode 100644 index 00000000..0a69422f --- /dev/null +++ b/reinvent-2019/sign-and-speak/lambda/inferSign-cf/index.py @@ -0,0 +1,74 @@ +import boto3 +import json +import uuid +from datetime import datetime, timedelta +import time +import os + +s2smodelendpoint = os.environ["s2smodelendpoint"] +messagestable = os.environ["messagestable"] + +def lambda_handler(event, context): + + recordInfo = event["Records"][0] + + s3Info = recordInfo["s3"] + + bucketInfo = s3Info["bucket"] + + bucketName = bucketInfo["name"] + + objectInfo = s3Info ["object"] + + key = objectInfo["key"] + print ("key : " + key) + + files3url = "s3://" + bucketName + "/" + key + print("files3url is " + files3url) + + client = boto3.client('sagemaker-runtime') + + response = client.invoke_endpoint( + EndpointName=s2smodelendpoint, #'sagemaker-pytorch-2019-11-22-23-31-53-466', + Body=json.dumps({'grid': files3url}), + ContentType='application/json' + ) + prediction = json.loads(response['Body'].read()) + prediction_label = prediction['output'] + prediction_confidence = prediction['confidence'] + print("prediction_label is " + prediction_label) + print("prediction_confidence is " + str(prediction_confidence)) + + #print(prediction) + + #store to dynamo db + ddbclient = boto3.client('dynamodb') + nowDTTM = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') # '2019-05-22 06:06:42 + epocSec = int(time.time()) + + response = ddbclient.put_item( + Item={ + 'msgid': { + 'S': str(uuid.uuid1()), + }, + 'msg': { + 'S': prediction_label, + }, + 'confidence': { + 'S': str(prediction_confidence), + }, + 'isSign' : { + 'BOOL' : True + }, + 'insertdttm' : { + 'S': nowDTTM + }, + 'epocSecond' : { + 'N' : str(epocSec) + } + }, + TableName=messagestable, + ) + + print(response) + diff --git a/reinvent-2019/sign-and-speak/lambda/python-video-to-grid-shell-cf/frame_picker.py b/reinvent-2019/sign-and-speak/lambda/python-video-to-grid-shell-cf/frame_picker.py new file mode 100644 index 00000000..58369f46 --- /dev/null +++ b/reinvent-2019/sign-and-speak/lambda/python-video-to-grid-shell-cf/frame_picker.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 + +import argparse +import sys +import glob +from math import ceil + + +def total_frame_count(dir): + """ + Returns the number of frames found in a directory. + """ + frame_files = glob.glob("{}/frame_*.png".format(dir)) + num_files = len(frame_files) + return num_files + + +def choose_frames(dir): + """ + Picks n evenly spaced frames from m frames. + Returns indices of chosen frames. + """ + NUM_GRID_FRAMES = 9 + total_frames = total_frame_count(dir) + grid_frame_indices = [] + for index in range(1, NUM_GRID_FRAMES + 1): + grid_frame_indices.append(int(ceil(index * total_frames / NUM_GRID_FRAMES))) + # Format as a string which can be easily parsed by a bash script + indices = "|".join(str(item) for item in grid_frame_indices) + return str(indices) + + +if __name__ == "__main__": + # Parse input + parser = argparse.ArgumentParser() + parser.add_argument("dir", help="directory containing the video frames") + args = parser.parse_args() + frame_indices = choose_frames(args.dir) + sys.exit(frame_indices) diff --git a/reinvent-2019/sign-and-speak/lambda/python-video-to-grid-shell-cf/index.py b/reinvent-2019/sign-and-speak/lambda/python-video-to-grid-shell-cf/index.py new file mode 100644 index 00000000..ef6068d4 --- /dev/null +++ b/reinvent-2019/sign-and-speak/lambda/python-video-to-grid-shell-cf/index.py @@ -0,0 +1,80 @@ +import json +import os +import boto3 + + +def lambda_handler(event, context): + # TODO implement + + recordInfo = event["Records"][0] + #print ('records info : ') + #print (recordInfo) + + s3Info = recordInfo["s3"] + #print ('s3event info : ') + #print(s3Info) + + bucketInfo = s3Info["bucket"] + #print ('bucket info : ') + #print (bucketInfo) + + bucketName = bucketInfo["name"] + print ('bucket Name : ' + bucketName) + #print (bucketName) + + objectInfo = s3Info ["object"] + #print ('objectInfo : ') + #print (objectInfo) + + + + key = objectInfo["key"] + print ("key : " + key) + #print (key ) 02_app/upload/hello.mp4 + + split = key.split('/') + filename = split[2] + print (filename) + + + + s3 = boto3.client('s3') + ##reinvent-signs-data/02_app/upload/hello.mp4 + + + #Download the video to /tmp folder + s3.download_file(bucketName, key, '/tmp/' + filename) + + # Copy all the scripts over to /tmp + os.system('cp testscript.sh /tmp/testscript.sh') + os.system('cp video_to_grid.sh /tmp/video_to_grid.sh') + os.system('cp frame_picker.py /tmp/frame_picker.py') + os.system('cp /opt/ffmpeg /tmp/ffmpeg') + os.system('echo copy done') + + + cmd = '/usr/bin/bash /tmp/testscript.sh '+ filename + os.system(cmd) + + stream = os.popen('echo Returned output') + output = stream.read() + print(output) + + + + #hello_grid.png + #/02_app/grid-image/lambda_grid.png + + split = filename.split('.') + filenameonlywithoutextension = split[0] + print('ready to upload grid file : ' + filenameonlywithoutextension) + + response = s3.upload_file('/tmp/' + filenameonlywithoutextension + '_grid.png', bucketName, '02_app/grid-image/' + filenameonlywithoutextension + '_grid.png') + + print('Uploaded grid image success') + + + return { + 'statusCode': 200, + 'body': json.dumps('Hello from Lambda!') + } diff --git a/reinvent-2019/sign-and-speak/lambda/python-video-to-grid-shell-cf/testscript.sh b/reinvent-2019/sign-and-speak/lambda/python-video-to-grid-shell-cf/testscript.sh new file mode 100644 index 00000000..31278581 --- /dev/null +++ b/reinvent-2019/sign-and-speak/lambda/python-video-to-grid-shell-cf/testscript.sh @@ -0,0 +1,19 @@ +echo "hello from script" + +echo "param" +echo $1 + +echo "-----------------------" +chmod 755 /tmp/ffmpeg +echo "cd /tmp" +cd /tmp +pwd +ls -lrt +echo "-----------------------" +echo "conversion started" +/usr/bin/bash /tmp/video_to_grid.sh $1 /tmp/ +echo "conversion done" +echo "-----------------------" +cd /tmp +ls -lrt +echo "script done" \ No newline at end of file diff --git a/reinvent-2019/sign-and-speak/lambda/python-video-to-grid-shell-cf/video_to_grid.sh b/reinvent-2019/sign-and-speak/lambda/python-video-to-grid-shell-cf/video_to_grid.sh new file mode 100644 index 00000000..34dc5190 --- /dev/null +++ b/reinvent-2019/sign-and-speak/lambda/python-video-to-grid-shell-cf/video_to_grid.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +###################### +# This script turns a video file into a grid image file. +# Example: +# ./video_to_grid.sh -i input-video.mp4 ./out-dir +###################### + +pwd + +# Retrieve command line input +input_video=$1 +output_dir=$2 +#mkdir $output_dir + +# Use a temporary directory to save files between steps +temp_dir="/tmp/wip" +mkdir $temp_dir + +# Separate the video file path into its parts +file_name=$(basename -- "$input_video") +extension=${file_name##*.} +video_name=${file_name%.*} + +echo ${input_video} + +# Center crop +if [ $extension == "mp4" ] +then + # The mp4 videos tend to be larger, which center crop needs to take into account + cropped_video="${temp_dir}/cropped.${extension}" + /tmp/ffmpeg -i $input_video -vf crop=700:700 $cropped_video +else + cropped_video="${temp_dir}/cropped.${extension}" + /tmp/ffmpeg -i $input_video -vf crop=480:480 $cropped_video +fi +# Resize +resized_video="${temp_dir}/resized.${extension}" +/tmp/ffmpeg -i $cropped_video -vf scale=76:76 $resized_video +# Split into frames +frame_file="${temp_dir}/frame_%01d.png" +/tmp/ffmpeg -i $resized_video -vf "select=not(mod(n\,6))" -vsync vfr -q:v 2 $frame_file +# Choose 9 evenly spread frames +frame_set=$(python frame_picker.py ${temp_dir} 2>&1 >/dev/null) +frames=(${frame_set//|/ }) +# Combine frames into grid image +grid_file="${output_dir}/${video_name}_grid.png" + +rm $grid_file + +/tmp/ffmpeg -i "${temp_dir}/frame_${frames[0]}.png" -i "${temp_dir}/frame_${frames[1]}.png" -i "${temp_dir}/frame_${frames[2]}.png" -i "${temp_dir}/frame_${frames[3]}.png" -i "${temp_dir}/frame_${frames[4]}.png" -i "${temp_dir}/frame_${frames[5]}.png" -i "${temp_dir}/frame_${frames[6]}.png" -i "${temp_dir}/frame_${frames[7]}.png" -i "${temp_dir}/frame_${frames[8]}.png" -filter_complex "[0:v][1:v][2:v][3:v][4:v][5:v][6:v][7:v][8:v]xstack=inputs=9:layout=w3_0|w3_h0+h2|w3_h0|0_h4|0_0|w3+w1_0|0_h1+h2|w3+w1_h0|w3+w1_h1+h2[v]" -map "[v]" $grid_file + +# Clean up +rm -r $temp_dir \ No newline at end of file diff --git a/reinvent-2019/sign-and-speak/lambda/s2s-ws-connect-cf/index.py b/reinvent-2019/sign-and-speak/lambda/s2s-ws-connect-cf/index.py new file mode 100644 index 00000000..0ccfd94f --- /dev/null +++ b/reinvent-2019/sign-and-speak/lambda/s2s-ws-connect-cf/index.py @@ -0,0 +1,44 @@ +import boto3 +import json +from datetime import datetime, timedelta +import time +import os + +def lambda_handler(event, context): + + print (event) + + msgBody = (event) + reqContext = event["requestContext"] + connectionId = reqContext["connectionId"] + print("connectionId is : " + connectionId) + + + + #store to dynamo db + ddbclient = boto3.client('dynamodb') + nowDTTM = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') # '2019-05-22 06:06:42 + epocSec = int(time.time()) + + response = ddbclient.put_item( + Item={ + 'connectionId': { + 'S': str(connectionId), + }, + 'insertdttm' : { + 'S': nowDTTM + }, + 'epocSecond' : { + 'N' : str(epocSec) + } + }, + TableName=os.environ['wsclientstable'], + ) + + print(response) + + + return { + 'statusCode': 200, + 'body': json.dumps('Successful connect') + } diff --git a/reinvent-2019/sign-and-speak/lambda/s2s-ws-disconnect-cf/index.py b/reinvent-2019/sign-and-speak/lambda/s2s-ws-disconnect-cf/index.py new file mode 100644 index 00000000..b228c1e8 --- /dev/null +++ b/reinvent-2019/sign-and-speak/lambda/s2s-ws-disconnect-cf/index.py @@ -0,0 +1,25 @@ +import json +import boto3 +import os +def lambda_handler(event, context): + + print (event) + + msgBody = (event) + reqContext = event["requestContext"] + connectionId = str(reqContext["connectionId"]) + print("connectionId is : " + connectionId) + + + dynamodb = boto3.resource('dynamodb') + table = dynamodb.Table(os.environ['wsclientstable']) + + response = table.delete_item( + Key={'connectionId' : connectionId} + ) + + # TODO implement + return { + 'statusCode': 200, + 'body': json.dumps('Successful disconnect') + } diff --git a/reinvent-2019/sign-and-speak/lambda/s2s-ws-sendmessage-node-cf/index.js b/reinvent-2019/sign-and-speak/lambda/s2s-ws-sendmessage-node-cf/index.js new file mode 100644 index 00000000..212a50c2 --- /dev/null +++ b/reinvent-2019/sign-and-speak/lambda/s2s-ws-sendmessage-node-cf/index.js @@ -0,0 +1,58 @@ +"use strict"; + +const AWS = require("aws-sdk"); +const wsclientstable = process.env.wsclientstable; +const wssurl = process.env.wssurl; + + +require("aws-sdk/clients/apigatewaymanagementapi"); + +exports.handler = async (event) => { + var record = event["Records"]; + var eventName = record[0]["eventName"]; + if(eventName == "INSERT") + { + var dynamodbItem = record[0]["dynamodb"]; + var newItem = dynamodbItem["NewImage"]; + var sign = newItem["msg"]["S"]; + var confidence = newItem["confidence"]["S"]; + var isSign = newItem["isSign"]["BOOL"]; + var finalMesage = isSign + "|" + confidence + "|" + sign ; + var dynamoClient = new AWS.DynamoDB.DocumentClient(); + let connectionData; + try { + connectionData = await dynamoClient.scan({ TableName: wsclientstable, + ProjectionExpression: 'connectionId' }).promise(); + } + catch (e) { + console.log(e.stack); + return { statusCode: 500, body: e.stack }; + } + const apigwManagementApi = new AWS.ApiGatewayManagementApi({ + apiVersion: '2018-11-29', + endpoint: wssurl + }); + const postCalls = connectionData.Items.map(async ({ connectionId }) => { + try { + await apigwManagementApi.postToConnection({ ConnectionId: connectionId, Data: finalMesage }).promise(); + } catch (e) { + if (e.statusCode === 410) { + console.log('Found stale connection, deleting ${connectionId}'); + await dynamoClient.delete({ TableName: wsclientstable, Key: { connectionId } }).promise(); + } else { + throw e; + } + } + }); + try { + await Promise.all(postCalls); + } catch (e) { + return { statusCode: 500, body: e.stack }; + } + } + const response = { + statusCode: 200, + body: JSON.stringify('DDB Stream Message processed'), + }; + return response; +}; \ No newline at end of file diff --git a/reinvent-2019/sign-and-speak/scripts/ML Instructions.ipynb b/reinvent-2019/sign-and-speak/scripts/ML Instructions.ipynb new file mode 100644 index 00000000..0ed560d5 --- /dev/null +++ b/reinvent-2019/sign-and-speak/scripts/ML Instructions.ipynb @@ -0,0 +1,288 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Sign & Speak ML Instructions\n", + "\n", + "This notebook shows how to use Amazon SageMaker to run the training and inference scripts for the Sign & Speak project.\n", + "\n", + "Use the `conda_pytorch_p36` kernel to run the cells in this notebook.\n", + "\n", + "## Training\n", + "\n", + "The following cell defines the training job to be run by Amazon SageMaker. It points to the `grid_train.py` training script, defines the number and types of instances used for training, sets the hyperparameter values, and defines regular expressions which Amazon SageMaker uses to track the training metrics.\n", + "\n", + "Before running this cell, you must provide a descriptive name for the training job and specify the Amazon S3 URI where the output should be stored. The URI should look like `s3://bucket-name/output-folder/`.\n", + "\n", + "*Note: If you are using a new AWS account, you may not have access to p2 instance types yet. The code should run fine on a CPU instance type, but it will require more time to complete. Submit a limit increase request to use p2 instances.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sagemaker\n", + "from sagemaker.pytorch import PyTorch\n", + "\n", + "# Replace the following variables with a descriptive name for the\n", + "# training job and an S3 URI where to store the output\n", + "JOB_NAME = 'INSERT_A_NAME_HERE'\n", + "OUTPUT_PATH = 'INSERT_AN_S3_URI_HERE'\n", + "\n", + "role = sagemaker.get_execution_role()\n", + "\n", + "estimator = PyTorch(entry_point='grid_train.py',\n", + " role=role,\n", + " base_job_name=JOB_NAME,\n", + " output_path=OUTPUT_PATH,\n", + " framework_version='1.1.0',\n", + " train_instance_count=1,\n", + " train_instance_type='ml.p2.xlarge',\n", + " hyperparameters={\n", + " \"epochs\": 10,\n", + " \"batch-size\": 4,\n", + " \"gamma\": 0.1,\n", + " \"lr\": 0.001,\n", + " \"momentum\": 0.9,\n", + " \"step-size\": 7\n", + " },\n", + " metric_definitions=[\n", + " {'Name': 'train:loss', 'Regex': 'train Loss: (.*?) '},\n", + " {'Name': 'train:acc', 'Regex': 'train Loss: .*? Acc: (.*?)$'},\n", + " {'Name': 'val:loss', 'Regex': 'val Loss: (.*?) '},\n", + " {'Name': 'val:acc', 'Regex': 'val Loss: .*? Acc: (.*?)$'}\n", + " ]\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once the training job has been defined, pass in the Amazon S3 URI for the training data to start the training job. The URI should look like `s3://bucket-name/training-data-folder/`, where `training-data-folder` contains one folder per label containing the training images for that label. \n", + "\n", + "This cell will output the logs of the training job, but you can also view the logs and visualize the metrics in the Amazon SageMaker console." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "estimator.fit({'training': 'INSERT_AN_S3_URI_HERE'})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hyperparameter Tuning\n", + "This section shows how to run a hyperparameter tuning job using Amazon SageMaker. First, define the range of values for the hyperparameters which you want to tune." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter\n", + "\n", + "hyperparameter_ranges = {\n", + " 'batch-size': IntegerParameter(3,30,scaling_type='Auto'),\n", + " 'momentum': ContinuousParameter(0.1, 0.9, scaling_type='Auto'),\n", + " 'step-size': IntegerParameter(3, 12, scaling_type='Auto'),\n", + " 'gamma': ContinuousParameter(0.01, 0.9, scaling_type='Auto')\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, define the training jobs which will be run during hyperparameter tuning. This is the same as in the above section on training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sagemaker\n", + "from sagemaker.pytorch import PyTorch\n", + "\n", + "# Replace the following variables with a descriptive name for the\n", + "# training job and an S3 URI where to store the output\n", + "JOB_NAME = 'INSERT_A_NAME_HERE'\n", + "OUTPUT_PATH = 'INSERT_AN_S3_URI_HERE'\n", + "\n", + "role = sagemaker.get_execution_role()\n", + "\n", + "estimator = PyTorch(entry_point='grid_train.py',\n", + " role=role,\n", + " base_job_name=JOB_NAME\n", + " output_path=OUTPUT_PATH,\n", + " framework_version='1.1.0',\n", + " train_instance_count=1,\n", + " train_instance_type='ml.p2.xlarge',\n", + " hyperparameters={\n", + " \"epochs\": 20,\n", + " \"lr\": 0.001\n", + " },\n", + " metric_definitions=[\n", + " {'Name': 'train:loss', 'Regex': 'train Loss: (.*?) '},\n", + " {'Name': 'train:acc', 'Regex': 'train Loss: .*? Acc: (.*?)$'},\n", + " {'Name': 'val:loss', 'Regex': 'val Loss: (.*?) '},\n", + " {'Name': 'val:acc', 'Regex': 'val Loss: .*? Acc: (.*?)$'}\n", + " ]\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, define the hyperparameter tuning job based on the defined hyperparameter ranges. Set the objective metric, the maximum number of training jobs, and the maximum number of parallel training jobs. \n", + "\n", + "*Note: make sure your AWS account limits allow for the number of parallel training jobs for the instance type defined in the training job.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.tuner import HyperparameterTuner\n", + "\n", + "TUNING_JOB_NAME = 'INSERT_A_NAME_HERE'\n", + "\n", + "tuner = HyperparameterTuner(\n", + " estimator=estimator,\n", + " objective_metric_name='val:acc',\n", + " hyperparameter_ranges=hyperparameter_ranges,\n", + " metric_definitions=[\n", + " {'Name': 'train:loss', 'Regex': 'train Loss: (.*?) '},\n", + " {'Name': 'train:acc', 'Regex': 'train Loss: .*? Acc: (.*?)$'},\n", + " {'Name': 'val:loss', 'Regex': 'val Loss: (.*?) '},\n", + " {'Name': 'val:acc', 'Regex': 'val Loss: .*? Acc: (.*?)$'}\n", + " ],\n", + " strategy='Bayesian',\n", + " objective_type='Maximize',\n", + " max_jobs=30,\n", + " max_parallel_jobs=3,\n", + " base_tuning_job_name=TUNING_JOB_NAME\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once the tuning job has been defined, pass in the Amazon S3 URI for the training data to start the tuning job. The URI should look like `s3://bucket-name/training-data-folder/`, where `training-data-folder` contains one folder per label containing the training images for that label.\n", + "\n", + "View the logs and visualize the metrics for the training jobs linked to this tuning job in the Amazon SageMaker console." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tuner.fit(inputs='INSERT_AN_S3_URI_HERE')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Deploying\n", + "\n", + "After running some training jobs and/or hyperparameter tuning jobs, decide on which training job you want to base your deployment. Find the Amazon S3 URI of the model package, which should look like `s3://bucket-name/training-job-name/output/model.tar.gz`. Insert the URI in the code below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.predictor import RealTimePredictor, json_serializer, json_deserializer\n", + "\n", + "class JSONPredictor(RealTimePredictor):\n", + " def __init__(self, endpoint_name, sagemaker_session):\n", + " super(JSONPredictor, self).__init__(endpoint_name, sagemaker_session, json_serializer, json_deserializer)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.pytorch import PyTorchModel\n", + "import sagemaker\n", + "\n", + "role = sagemaker.get_execution_role()\n", + "model = PyTorchModel(model_data='INSERT_S3_URI_OF_MODEL_PACKAGE',\n", + " role=role,\n", + " framework_version='1.1.0',\n", + " entry_point='grid_serve.py',\n", + " predictor_cls=JSONPredictor)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After defining the model and predictor type, we specify the number and type of instances for running the endpoint. \n", + "\n", + "*Note: An endpoint takes several minutes to start up.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "predictor = model.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once your endpoint is complete, note down the name to link it up to the Sign & Speak user interface." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "conda_pytorch_p36", + "language": "python", + "name": "conda_pytorch_p36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/reinvent-2019/sign-and-speak/scripts/grid_serve.py b/reinvent-2019/sign-and-speak/scripts/grid_serve.py new file mode 100644 index 00000000..de0af361 --- /dev/null +++ b/reinvent-2019/sign-and-speak/scripts/grid_serve.py @@ -0,0 +1,111 @@ +"""Sign & Speak Inference Script + +This script defines the methods required by Amazon SageMaker to create an +inference endpoint for the Sign & Speak project. It expects image input, +where each image is a 3x3 grid of video frames, and is stored in an Amazon S3 +bucket. The output is the text version of the label and a confidence score. + +Input sent to the endpoint should be JSON with the following format: +{'grid': } + +Output returned will be JSON with the following format: +{'output': , +'confidence': } +""" + +import json +import logging +import os +import tempfile +import re + +import boto3 +import torch +from torchvision import transforms +from PIL import Image + + +logger = logging.getLogger(__name__) +JSON_CONTENT_TYPE = 'application/json' +# Define a data transformation similar to the one used to train the original ResNet model +transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]) +classes = {} + + +def model_fn(model_dir): + """ + Loads the trained model from the model directory. + """ + logger.info('Loading the model.') + logger.info(model_dir) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + logger.info('Current device: {}'.format(device)) + if device == "cuda": + model = torch.load(os.path.join(model_dir, 'model.pth')) #GPU + else: + model = torch.load(os.path.join(model_dir, 'model.pth'), map_location="cpu") #CPU + model.to(device).eval() + + logger.info('Loading the classes.') + global classes + with open(os.path.join(model_dir, 'class_indices.json'), 'r') as file_handler: + classes = json.load(file_handler) + + return model + + +def input_fn(serialized_input_data, content_type=JSON_CONTENT_TYPE): + """ + Loads the JSON input, fetches the image from S3, and transforms the image. + """ + logger.info('Deserializing the input data.') + if content_type == JSON_CONTENT_TYPE: + # Load JSON input + input_data = json.loads(serialized_input_data) + image_loc_s3 = input_data['grid'] + + # Fetch bucket and object details + url_components = re.search("s3://(.+?)/(.*)", image_loc_s3) + bucket_name = url_components.group(1) + object_name = url_components.group(2) + + # Load image file from S3 bucket + tmp = tempfile.NamedTemporaryFile() + with open(tmp.name, 'wb') as file_handle: + s3_client = boto3.client('s3') + s3_client.download_fileobj(bucket_name, object_name, file_handle) + image = Image.open(tmp.name) + + # Transform image same as during training + transformed_image = transform(image) + model_input = transformed_image.unsqueeze(0) + return model_input + raise Exception("Requested unsupported ContentType in content_type: {}".format(content_type)) + + +def output_fn(prediction_output, accept=JSON_CONTENT_TYPE): + """ + Transforms the model output to return the text label instead of its index. Returns + the result as JSON. + """ + logger.info('Serializing the generated output.') + logger.info("Original output is {}".format(prediction_output)) + + # Normalize the confidence value to be a float value between 0 and 1 + normalized_output = torch.nn.functional.softmax(prediction_output[0], dim=0) + batched_norm = normalized_output.unsqueeze(0) + values, indices = torch.max(batched_norm.data, 1) + + # Fetch the text label based on the label index + #classes = {"cat": 0, "eight o clock": 1, "friend": 2, "good how are you": 3, "goodbye": 4, "grandfather": 5, "grandmother": 6, "hello": 7, "pleased to meet you": 8, "pub": 9, "restaurant": 10, "thank you": 11} + for label, index in classes.items(): + if index == indices.item(): + class_from_idx = label + + # Format and return the final result + if accept == JSON_CONTENT_TYPE: + return json.dumps({'output': class_from_idx, 'confidence': values.item()}), accept + raise Exception('Requested unsupported ContentType in Accept: ' + accept) diff --git a/reinvent-2019/sign-and-speak/scripts/grid_train.py b/reinvent-2019/sign-and-speak/scripts/grid_train.py new file mode 100644 index 00000000..33164a32 --- /dev/null +++ b/reinvent-2019/sign-and-speak/scripts/grid_train.py @@ -0,0 +1,195 @@ +"""Sign & Speak Training Script + +This script uses transfer learning on a ResNet18 model from the PyTorch model zoo +to train an image classification model for a limited set of sign language words +and phrases. It is compatible with Amazon SageMaker, which can be used to run +training and hyperparameter tuning jobs from this script. + +The input consists of 3x3 grid images of video frames, organized into folders +for each label (e.g., a folder named 'cat' contains all training images for +the sign for cat). + +The code is based on this tutorial: +https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html +""" + +import time +import copy +import logging +import sys +import argparse +import json +import os + +import numpy as np +import torch +from torch.utils.data.sampler import SubsetRandomSampler +from torch.utils.data import DataLoader +import torch.nn as nn +import torch.optim as optim +from torch.optim import lr_scheduler +from torchvision import transforms +from torchvision.datasets import ImageFolder +from torchvision import models + + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) +logger.addHandler(logging.StreamHandler(sys.stdout)) + + +def save_model(model, model_dir): + """ + Saves the entire PyTorch model in the model + artifact directory. + """ + logger.info("Saving the model.") + path = os.path.join(model_dir, 'model.pth') + torch.save(model, path) + + +def save_classes(class_to_idx, output_dir): + """ + Saves the dictionary of indices assigned to + labels in the output directory as a JSON file. + """ + logger.info("Saving classes.") + with open(os.path.join(output_dir, "class_indices.json"), "w") as file_handle: + json.dump(class_to_idx, file_handle) + + +def train(args): + """ + Splits the data set into training and validation sets, transforms the data, + and runs the training epochs. + """ + + # Define a data transformation similar to the one used to train the original ResNet model + data_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]) + + # Load data from an image folder structure (one folder per label) + dataset = ImageFolder(args.data_dir, transform=data_transform) + class_names = dataset.classes + + # Shuffle and define split of data into training and validation sets + validation_split = .1 + shuffle_dataset = True + dataset_size = len(dataset) + indices = list(range(dataset_size)) + split = int(np.floor(validation_split * dataset_size)) + if shuffle_dataset : + np.random.seed(args.seed) + np.random.shuffle(indices) + train_indices, val_indices = indices[split:], indices[:split] + dataset_sizes = {'train': len(train_indices), + 'val': len(val_indices)} + samplers = {'train': SubsetRandomSampler(train_indices), + 'val': SubsetRandomSampler(val_indices)} + + dataloaders = {x: DataLoader(dataset, batch_size=args.batch_size, + sampler=samplers[x]) + for x in ['train', 'val']} + + + # Load and set up pretrained ResNet model + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + model_ft = models.resnet18(pretrained=True) + num_ftrs = model_ft.fc.in_features + logger.info("Number of classes is {}".format(len(class_names))) + model_ft.fc = nn.Linear(num_ftrs, len(class_names)) + model_ft = model_ft.to(device) + criterion = nn.CrossEntropyLoss() + + # Observe that all parameters are being optimized + optimizer_ft = optim.SGD(model_ft.parameters(), lr=args.lr, momentum=args.momentum) + + # Decay learning rate + exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=args.step_size, gamma=args.gamma) + + since = time.time() + best_model_wts = copy.deepcopy(model_ft.state_dict()) + best_acc = 0.0 + num_epochs = args.epochs + + for epoch in range(num_epochs): + logger.info('Epoch {}/{}'.format(epoch, num_epochs - 1)) + logger.info('-' * 10) + + # Each epoch has a training and validation phase + for phase in ['train', 'val']: + if phase == 'train': + model_ft.train() # Set model to training mode + else: + model_ft.eval() # Set model to evaluate mode + + running_loss = 0.0 + running_corrects = 0 + + # Iterate over data. + for inputs, labels in dataloaders[phase]: + inputs = inputs.to(device) + labels = labels.to(device) + + # zero the parameter gradients + optimizer_ft.zero_grad() + + # forward + # track history if only in train + with torch.set_grad_enabled(phase == 'train'): + outputs = model_ft(inputs) + _, preds = torch.max(outputs, 1) + loss = criterion(outputs, labels) + + # backward + optimize only if in training phase + if phase == 'train': + loss.backward() + optimizer_ft.step() + + # statistics + running_loss += loss.item() * inputs.size(0) + running_corrects += torch.sum(preds == labels.data) + + if phase == 'train': + exp_lr_scheduler.step() + + epoch_loss = running_loss / dataset_sizes[phase] + epoch_acc = running_corrects.double() / dataset_sizes[phase] + + logger.info('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc)) + + # Track the best validation accuracy + if phase == 'val' and epoch_acc > best_acc: + best_acc = epoch_acc + best_model_wts = copy.deepcopy(model_ft.state_dict()) + + time_elapsed = time.time() - since + logger.info('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) + logger.info('Best val Acc: {:4f}'.format(best_acc)) + + # Load best model weights and save this model + model_ft.load_state_dict(best_model_wts) + save_model(model_ft, args.model_dir) + save_classes(dataset.class_to_idx, args.model_dir) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + + # Parameters specific to the deep learning model + parser.add_argument('--batch-size', type=int, default=4, help='input batch size for training (default: 4)') + parser.add_argument('--epochs', type=int, default=25, help='number of epochs to train (default: 25)') + parser.add_argument('--lr', type=float, default=0.001, help='learning rate (default: 0.001)') + parser.add_argument('--momentum', type=float, default=0.9, help='SGD momentum (default: 0.9)') + parser.add_argument('--seed', type=int, default=42, help='random seed (default: 42)') + parser.add_argument('--step-size', type=int, default=7, help='step size (default: 7)') + parser.add_argument('--gamma', type=float, default=0.1, help='gamma (default: 0.1)') + + # Amazon SageMaker container environment variables + parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR'], help='path to the directory to write model artifacts to') + parser.add_argument('--data-dir', type=str, default=os.environ['SM_CHANNEL_TRAINING'], help='path to the directory containing the training data') + + train(parser.parse_args()) + \ No newline at end of file diff --git a/reinvent-2019/sign-and-speak/ui/sign.html b/reinvent-2019/sign-and-speak/ui/sign.html new file mode 100644 index 00000000..b5a27881 --- /dev/null +++ b/reinvent-2019/sign-and-speak/ui/sign.html @@ -0,0 +1,110 @@ + + + + + Sign&Speak + + + + + + +
+
+
+
+
+
+
Sign Panel
+
+
+ + +
+
+
+
+ + +
+
+ +
+
+
+
+
+ +
+
+
+
+
+
+
+
+
+
+
Message Panel
+
+
+ + +
+
+ + +
+ +
x + +
+
+
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + diff --git a/reinvent-2019/sign-and-speak/ui/static/css/signspeak.css b/reinvent-2019/sign-and-speak/ui/static/css/signspeak.css new file mode 100644 index 00000000..9d2f0d27 --- /dev/null +++ b/reinvent-2019/sign-and-speak/ui/static/css/signspeak.css @@ -0,0 +1,13 @@ +.loader { + border: 16px solid #f3f3f3; /* Light grey */ + border-top: 16px solid #3498db; /* Blue */ + border-radius: 50%; + width: 120px; + height: 120px; + animation: spin 2s linear infinite; + } + + @keyframes spin { + 0% { transform: rotate(0deg); } + 100% { transform: rotate(360deg); } + } \ No newline at end of file diff --git a/reinvent-2019/sign-and-speak/ui/static/js/sign.js b/reinvent-2019/sign-and-speak/ui/static/js/sign.js new file mode 100644 index 00000000..020ee1a2 --- /dev/null +++ b/reinvent-2019/sign-and-speak/ui/static/js/sign.js @@ -0,0 +1,142 @@ +///AWS Account Settings +var app_credentials = { // TODO: Get the details from IAM User "s2sclientuser" + accessKeyId: '',//TODO: Update the accesskeyid.. + secretAccessKey: '' //TODO: Update this secretAccessKey + }; +var app_region = 'ap-southeast-2'; //TODO: Set the region as required + + +var s3Bucket = "signs-data-cf"; +var s3Key = "02_app/upload/RecordedSign.webm"; + +AWS.config.update({credentials: app_credentials, region: app_region}); +var s3 = new AWS.S3(); + +var video; +var recorder; + +var videoConstraints = { + video: true , + audio: false +}; + +var initialized = false; + +$(document).ready(function() +{ + document.getElementById("imgSignGrid").style.visibility ="hidden"; + video = document.getElementById('videoplayer'); + s3 = new AWS.S3(); + + document.getElementById('btn-start-recording').onclick = function() { + this.disabled = true; + showRecVideo(); + + if(initialized == false) + { + //Set Recorder for first time + captureCamera(function(camera) { + video.muted = true; + video.volume = 0; + video.srcObject = camera; + recorder = RecordRTC(camera, { + type: 'video' + }); + recorder.startRecording(); + // release camera on stopRecording + recorder.camera = camera; + document.getElementById('btn-stop-recording').disabled = false; + }); + + initialized = true; + } + else + { + recorder.startRecording(); + document.getElementById('btn-stop-recording').disabled = false; + } + + + }; + + document.getElementById('btn-stop-recording').onclick = function() { + this.disabled = true; + + recorder.stopRecording(stopRecordingCallback); + document.getElementById('btn-start-recording').disabled = false; + }; +}); + +function captureCamera(callback) { + navigator.mediaDevices.getUserMedia(videoConstraints).then(function(camera) { + callback(camera); + }).catch(function(error) { + updateError('Unable to capture your camera. Please check console logs.'); + updateError(error); + }); +} +var vidblob = null; +function stopRecordingCallback() { + vidblob = recorder.getBlob(); +} + +function uploadVideoToS3(videoToUpload, key) +{ + console.info("upload video to S3"); + + document.getElementById("divSpinner").style.visibility=""; + + try + { + var params = { + Body: videoToUpload, + Bucket: s3Bucket, + Key: key, + ServerSideEncryption: "AES256", + }; + + s3.putObject(params, function(err, data) { + if (err) + updateError(err + err.stack); // an error occurred + }); + } + catch(e) + { + updateError("error encountered : " + e ); + } +} + +$('#btn-send-sign').click(function () { + uploadVideoToS3(vidblob,s3Key); + document.getElementById("imgSignGrid").style.visibility =""; +}); + + + + +function showRecVideo() +{ + var vidplayer = document.getElementById("videoplayer"); + var secvidplayer = document.getElementById("sec-videoplayer"); + + vidplayer.className = ""; + secvidplayer.className = "d-none"; +} + +function updateInfo(message) +{ + var node = document.createElement("li"); + node.className = "alert-info"; + var textnode = document.createTextNode("INFO : " + message); + node.appendChild(textnode); + document.getElementById("messages").appendChild(node); +} +function updateError(message) +{ + console.error(message); + var node = document.createElement("li"); + node.className = "alert-warning"; + var textnode = document.createTextNode("ERROR : " + message); + node.appendChild(textnode); + document.getElementById("messages").appendChild(node); +} \ No newline at end of file diff --git a/reinvent-2019/sign-and-speak/ui/static/js/web-socket-msg.js b/reinvent-2019/sign-and-speak/ui/static/js/web-socket-msg.js new file mode 100644 index 00000000..ded5992f --- /dev/null +++ b/reinvent-2019/sign-and-speak/ui/static/js/web-socket-msg.js @@ -0,0 +1,95 @@ +var wsurl = "wss://abcdxyz.execute-api.ap-southeast-2.amazonaws.com/Prod"; //TODO: Put the CloudFormation Template Output value for S2SWebSocketURL + +$(document).ready(function() +{ + WebSocketConnect(); + + document.getElementById('btn-resetui').onclick = function() { + $('#transcript').val(''); + transcription = ''; + document.getElementById("divtranscript").innerHTML = ""; + //document.getElementById("imgSignGrid").src = ""; + }; +}); + +function WebSocketConnect() { + + + if ("WebSocket" in window) { + console.info("WS is supported by browser"); + + // Let us open a web socket + var ws = new WebSocket(wsurl); + + ws.onopen = function() { + console.info("connection opened to WS"); + ws.send("Client is connected"); + console.info("Client is connected"); + }; + + ws.onmessage = function (evt) { + document.getElementById("divSpinner").style.visibility="hidden"; + //document.getElementById("imgSignGrid").src = ""; + var received_msg = evt.data; + console.info('msg recv from socket : ' + received_msg); + updateMsgPanel( received_msg); + + //alert("Message is received..."); + }; + + ws.onclose = function() { + + // websocket is closed. + console.info("connection closed to WS"); + alert("WS is closed. Refresh screen!!!"); + //alert("Connection is closed..."); + }; + } else { + + // The browser doesn't support WebSocket + console.error("WS is not supported"); + alert("WebSocket NOT supported by your Browser!"); + } +} +function updateMsgPanel(messageData) +{ + //Message Format + //isSign|confidence|Message + //true|0.6358023881912231|Hello + msgSplit = messageData.split("|"); + var sign = msgSplit[0]; + var conf = msgSplit[1]; + var msg = msgSplit[2]; + + if(msg == undefined) + { + return; + } + + var divnode = document.createElement("div"); + divnode.className = "alert alert-dark message" + var spannode = document.createElement("span"); + var imageAction = document.createElement("img"); + imageAction.id="imgAction"; + var textnode; + var linknode = document.createElement("a"); + if(sign == "true") + { + divnode.className = "alert alert-warning message" + spannode.className = "badge badge-warning"; + spannode.innerText = "Auslan"; + textnode = document.createTextNode( " " + msg + " (Confidence: " + conf + ")"); + } + else + { + divnode.className = "alert alert-dark message" + spannode.className = "badge badge-dark"; + spannode.innerText = "English"; + textnode = document.createTextNode( " " + msg + " ( Amazon Transcribe )"); + } + + divnode.appendChild(spannode); + divnode.appendChild(textnode); + + document.getElementById("divtranscript").appendChild(divnode); +}