From b292b6ddd4f74e09ed8a6fc8efa47880f87a45b2 Mon Sep 17 00:00:00 2001 From: heyqule Date: Thu, 22 Aug 2019 02:31:17 -0400 Subject: [PATCH 01/55] Add docker compose for kibana and elasticsearch --- .gitignore | 6 ++++++ docker-compose.yml | 29 +++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 docker-compose.yml diff --git a/.gitignore b/.gitignore index c305487..fa3b3e9 100644 --- a/.gitignore +++ b/.gitignore @@ -97,6 +97,12 @@ ENV/ # mkdocs documentation /site +#Custom files +data/ +config.py +*_export.json +.git + # mypy .mypy_cache/ .DS_Store diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..9fb5941 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,29 @@ +# ./docker-compose.yml + +version: '3' + +services: + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:5.6.16 + environment: + - cluster.name=docker-cluster + - bootstrap.memory_lock=true + - "ES_JAVA_OPTS=-Xms512m -Xmx512m" + volumes: + - ./data:/usr/share/elasticsearch/data + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 + hard: 65536 + nproc: + soft: 2048 + hard: 2048 + ports: + - "9200:9200" + kibana: + image: docker.elastic.co/kibana/kibana:5.6.16 + ports: + - "5601:5601" \ No newline at end of file From 8ee20b879bfd1612b2837e460ef65bd18204476e Mon Sep 17 00:00:00 2001 From: heyqule Date: Thu, 22 Aug 2019 23:24:49 -0400 Subject: [PATCH 02/55] Add support for multiple set of nltk tokens. Controls by --index --- .gitignore | 2 +- config.py.sample | 6 +- kibana_exports/amd_export.json | 114 ++++++++++++++++++++++++++++++++ kibana_exports/tsla_export.json | 114 ++++++++++++++++++++++++++++++++ sentiment.py | 29 +++++--- startup.sh | 6 ++ 6 files changed, 261 insertions(+), 10 deletions(-) create mode 100644 kibana_exports/amd_export.json create mode 100644 kibana_exports/tsla_export.json create mode 100755 startup.sh diff --git a/.gitignore b/.gitignore index fa3b3e9..b80a2f8 100644 --- a/.gitignore +++ b/.gitignore @@ -100,8 +100,8 @@ ENV/ #Custom files data/ config.py -*_export.json .git +.idea # mypy .mypy_cache/ diff --git a/config.py.sample b/config.py.sample index 43b3b61..6ec2a09 100644 --- a/config.py.sample +++ b/config.py.sample @@ -6,7 +6,11 @@ consumer_key = "" consumer_secret = "" access_token = "" access_token_secret = "" -nltk_tokens_required = ("Tesla", "@Tesla", "#Tesla", "tesla", "TSLA", "tsla", "#TSLA", "#tsla", "elonmusk", "Elon", "Musk") +nltk_tokens_required = { + 'default': ("Tesla", "@Tesla", "#Tesla", "tesla", "TSLA", "tsla", "#TSLA", "#tsla", "elonmusk", "Elon", "Musk"), + 'tsla': ("Tesla", "@Tesla", "#Tesla", "tesla", "TSLA", "tsla", "#TSLA", "#tsla", "elonmusk", "Elon", "Musk"), + 'amd': ('amd','ryzen','epyc','radeon','server','data','center','crossfire','threadripper') +} nltk_tokens_ignored = ("win", "Win", "giveaway", "Giveaway") twitter_feeds = ["@elonmusk", "@cnbc", "@benzinga", "@stockwits", "@Newsweek", "@WashingtonPost", "@breakoutstocks", "@bespokeinvest", diff --git a/kibana_exports/amd_export.json b/kibana_exports/amd_export.json new file mode 100644 index 0000000..a1ce780 --- /dev/null +++ b/kibana_exports/amd_export.json @@ -0,0 +1,114 @@ +[ + { + "_id": "amd-stock-dashboard", + "_type": "dashboard", + "_source": { + "title": "amd_dashboard", + "hits": 0, + "description": "", + "panelsJSON": "[{\"col\":1,\"id\":\"AWZO7a1n0rkQl37xrvXK\",\"panelIndex\":1,\"row\":3,\"size_x\":3,\"size_y\":3,\"type\":\"visualization\"},{\"col\":9,\"id\":\"AWZO8wUR0rkQl37xrvXV\",\"panelIndex\":2,\"row\":3,\"size_x\":4,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"columns\":[\"author\",\"location\",\"message\",\"polarity\",\"subjectivity\",\"sentiment\"],\"id\":\"AWZO_6iv0rkQl37xrvXt\",\"panelIndex\":3,\"row\":6,\"size_x\":12,\"size_y\":4,\"sort\":[\"date\",\"desc\"],\"type\":\"search\"},{\"col\":1,\"id\":\"AWZW6DNS0rkQl37xrvcg\",\"panelIndex\":4,\"row\":10,\"size_x\":12,\"size_y\":4,\"type\":\"visualization\"},{\"col\":4,\"id\":\"AWZYOrcih4RzKn4w3M7J\",\"panelIndex\":5,\"row\":3,\"size_x\":5,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"id\":\"AWZY6Xtjh4RzKn4w3NXT\",\"panelIndex\":6,\"row\":1,\"size_x\":12,\"size_y\":2,\"type\":\"visualization\"}]", + "optionsJSON": "{\"darkTheme\":true}", + "uiStateJSON": "{\"P-2\":{\"vis\":{\"legendOpen\":true}},\"P-6\":{\"vis\":{\"defaultColors\":{\"0 - 1\":\"rgb(0,104,55)\"}}}}", + "version": 1, + "timeRestore": false, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"filter\":[{\"query\":{\"match_all\":{}}}],\"highlightAll\":true,\"version\":true}" + } + } + }, + { + "_id": "amd-stock-search", + "_type": "search", + "_source": { + "title": "amd_savesearch", + "description": "", + "hits": 0, + "columns": [ + "author", + "location", + "message", + "polarity", + "subjectivity", + "sentiment" + ], + "sort": [ + "date", + "desc" + ], + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"amd\",\"highlightAll\":true,\"version\":true,\"query\":{\"match_all\":{}},\"filter\":[{\"meta\":{\"index\":\"amd\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"_type\",\"value\":\"tweet\"},\"query\":{\"match\":{\"_type\":{\"query\":\"tweet\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" + } + } + }, + { + "_id": "amd-stock-visualization-1", + "_type": "visualization", + "_source": { + "title": "amd_polarity", + "visState": "{\"title\":\"amd_polarity\",\"type\":\"metric\",\"params\":{\"addTooltip\":true,\"addLegend\":false,\"type\":\"gauge\",\"gauge\":{\"verticalSplit\":false,\"autoExtend\":false,\"percentageMode\":false,\"gaugeType\":\"Metric\",\"gaugeStyle\":\"Full\",\"backStyle\":\"Full\",\"orientation\":\"vertical\",\"colorSchema\":\"Green to Red\",\"gaugeColorMode\":\"None\",\"useRange\":false,\"colorsRange\":[{\"from\":0,\"to\":1}],\"invertColors\":false,\"labels\":{\"show\":true,\"color\":\"black\"},\"scale\":{\"show\":false,\"labels\":false,\"color\":\"#333\",\"width\":2},\"type\":\"simple\",\"style\":{\"fontSize\":\"24\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\"},\"extendRange\":false}},\"aggs\":[{\"id\":\"5\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"median\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\",\"percents\":[50]}},{\"id\":\"2\",\"enabled\":true,\"type\":\"min\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}}],\"listeners\":{}}", + "uiStateJSON": "{\"vis\":{\"defaultColors\":{\"0 - 1\":\"rgb(0,104,55)\"}}}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"amd\",\"query\":{\"match_all\":{}},\"filter\":[]}" + } + } + }, + { + "_id": "amd-stock-visualization-2", + "_type": "visualization", + "_source": { + "title": "amd_sentinel", + "visState": "{\"title\":\"amd_sentinel\",\"type\":\"pie\",\"params\":{\"addLegend\":true,\"addTooltip\":true,\"isDonut\":false,\"legendPosition\":\"bottom\",\"type\":\"pie\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment.keyword\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{}}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"amd\",\"query\":{\"match_all\":{}},\"filter\":[]}" + } + } + }, + { + "_id": "amd-stock-visualization-3", + "_type": "visualization", + "_source": { + "title": "amd_stockprice", + "visState": "{\"title\":\"amd_stockprice\",\"type\":\"line\",\"params\":{\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{\"text\":\"date per 30 seconds\"}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Sum of price_last\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Sum of price_last\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"3\",\"label\":\"Sum of price_high\"},\"valueAxis\":\"ValueAxis-1\"},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"4\",\"label\":\"Sum of price_low\"},\"valueAxis\":\"ValueAxis-1\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"bottom\",\"times\":[],\"addTimeMarker\":false,\"type\":\"line\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"price_last\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"enabled\":true,\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"price_high\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"price_low\"}}],\"listeners\":{}}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"amd\",\"query\":{\"match_all\":{}},\"filter\":[{\"meta\":{\"index\":\"amd\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"_type\",\"value\":\"stock\"},\"query\":{\"match\":{\"_type\":{\"query\":\"stock\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" + } + } + }, + { + "_id": "amd-stock-visualization-4", + "_type": "visualization", + "_source": { + "title": "amd_tweets", + "visState": "{\"title\":\"amd_tweets\",\"type\":\"line\",\"params\":{\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{\"text\":\"date per 30 seconds\"}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"interpolate\":\"linear\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"bottom\",\"times\":[],\"addTimeMarker\":false,\"type\":\"line\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{}}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"amd\",\"query\":{\"match_all\":{}},\"filter\":[]}" + } + } + }, + { + "_id": "amd-stock-visualization-5", + "_type": "visualization", + "_source": { + "title": "amd_wordcloud", + "visState": "{\n \"title\": \"amd_wordcloud\",\n \"type\": \"tagcloud\",\n \"params\": {\n \"scale\": \"linear\",\n \"orientation\": \"single\",\n \"minFontSize\": 14,\n \"maxFontSize\": 36,\n \"type\": \"tagcloud\"\n },\n \"aggs\": [\n {\n \"id\": \"1\",\n \"enabled\": true,\n \"type\": \"count\",\n \"schema\": \"metric\",\n \"params\": {}\n },\n {\n \"id\": \"2\",\n \"enabled\": true,\n \"type\": \"terms\",\n \"schema\": \"segment\",\n \"params\": {\n \"field\": \"message.keyword\",\n \"size\": 25,\n \"order\": \"desc\",\n \"orderBy\": \"1\"\n }\n }\n ],\n \"listeners\": {}\n}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\n \"index\": \"amd\",\n \"query\": {\n \"match_all\": {}\n },\n \"filter\": []\n}" + } + } + } +] \ No newline at end of file diff --git a/kibana_exports/tsla_export.json b/kibana_exports/tsla_export.json new file mode 100644 index 0000000..cc4ab6e --- /dev/null +++ b/kibana_exports/tsla_export.json @@ -0,0 +1,114 @@ +[ + { + "_id": "tsla-stock-dashboard", + "_type": "dashboard", + "_source": { + "title": "tsla_dashboard", + "hits": 0, + "description": "", + "panelsJSON": "[{\"col\":1,\"id\":\"AWZO7a1n0rkQl37xrvXK\",\"panelIndex\":1,\"row\":3,\"size_x\":3,\"size_y\":3,\"type\":\"visualization\"},{\"col\":9,\"id\":\"AWZO8wUR0rkQl37xrvXV\",\"panelIndex\":2,\"row\":3,\"size_x\":4,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"columns\":[\"author\",\"location\",\"message\",\"polarity\",\"subjectivity\",\"sentiment\"],\"id\":\"AWZO_6iv0rkQl37xrvXt\",\"panelIndex\":3,\"row\":6,\"size_x\":12,\"size_y\":4,\"sort\":[\"date\",\"desc\"],\"type\":\"search\"},{\"col\":1,\"id\":\"AWZW6DNS0rkQl37xrvcg\",\"panelIndex\":4,\"row\":10,\"size_x\":12,\"size_y\":4,\"type\":\"visualization\"},{\"col\":4,\"id\":\"AWZYOrcih4RzKn4w3M7J\",\"panelIndex\":5,\"row\":3,\"size_x\":5,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"id\":\"AWZY6Xtjh4RzKn4w3NXT\",\"panelIndex\":6,\"row\":1,\"size_x\":12,\"size_y\":2,\"type\":\"visualization\"}]", + "optionsJSON": "{\"darkTheme\":true}", + "uiStateJSON": "{\"P-2\":{\"vis\":{\"legendOpen\":true}},\"P-6\":{\"vis\":{\"defaultColors\":{\"0 - 1\":\"rgb(0,104,55)\"}}}}", + "version": 1, + "timeRestore": false, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"filter\":[{\"query\":{\"match_all\":{}}}],\"highlightAll\":true,\"version\":true}" + } + } + }, + { + "_id": "tsla-stock-search", + "_type": "search", + "_source": { + "title": "tsla_savesearch", + "description": "", + "hits": 0, + "columns": [ + "author", + "location", + "message", + "polarity", + "subjectivity", + "sentiment" + ], + "sort": [ + "date", + "desc" + ], + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"tsla\",\"highlightAll\":true,\"version\":true,\"query\":{\"match_all\":{}},\"filter\":[{\"meta\":{\"index\":\"tsla\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"_type\",\"value\":\"tweet\"},\"query\":{\"match\":{\"_type\":{\"query\":\"tweet\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" + } + } + }, + { + "_id": "tsla-stock-visualization-1", + "_type": "visualization", + "_source": { + "title": "tsla_polarity", + "visState": "{\"title\":\"tsla_polarity\",\"type\":\"metric\",\"params\":{\"addTooltip\":true,\"addLegend\":false,\"type\":\"gauge\",\"gauge\":{\"verticalSplit\":false,\"autoExtend\":false,\"percentageMode\":false,\"gaugeType\":\"Metric\",\"gaugeStyle\":\"Full\",\"backStyle\":\"Full\",\"orientation\":\"vertical\",\"colorSchema\":\"Green to Red\",\"gaugeColorMode\":\"None\",\"useRange\":false,\"colorsRange\":[{\"from\":0,\"to\":1}],\"invertColors\":false,\"labels\":{\"show\":true,\"color\":\"black\"},\"scale\":{\"show\":false,\"labels\":false,\"color\":\"#333\",\"width\":2},\"type\":\"simple\",\"style\":{\"fontSize\":\"24\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\"},\"extendRange\":false}},\"aggs\":[{\"id\":\"5\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"median\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\",\"percents\":[50]}},{\"id\":\"2\",\"enabled\":true,\"type\":\"min\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}}],\"listeners\":{}}", + "uiStateJSON": "{\"vis\":{\"defaultColors\":{\"0 - 1\":\"rgb(0,104,55)\"}}}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"tsla\",\"query\":{\"match_all\":{}},\"filter\":[]}" + } + } + }, + { + "_id": "tsla-stock-visualization-2", + "_type": "visualization", + "_source": { + "title": "tsla_sentinel", + "visState": "{\"title\":\"tsla_sentinel\",\"type\":\"pie\",\"params\":{\"addLegend\":true,\"addTooltip\":true,\"isDonut\":false,\"legendPosition\":\"bottom\",\"type\":\"pie\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment.keyword\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{}}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"tsla\",\"query\":{\"match_all\":{}},\"filter\":[]}" + } + } + }, + { + "_id": "tsla-stock-visualization-3", + "_type": "visualization", + "_source": { + "title": "tsla_stockprice", + "visState": "{\"title\":\"tsla_stockprice\",\"type\":\"line\",\"params\":{\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{\"text\":\"date per 30 seconds\"}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Sum of price_last\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Sum of price_last\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"3\",\"label\":\"Sum of price_high\"},\"valueAxis\":\"ValueAxis-1\"},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"4\",\"label\":\"Sum of price_low\"},\"valueAxis\":\"ValueAxis-1\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"bottom\",\"times\":[],\"addTimeMarker\":false,\"type\":\"line\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"price_last\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"enabled\":true,\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"price_high\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"price_low\"}}],\"listeners\":{}}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"tsla\",\"query\":{\"match_all\":{}},\"filter\":[{\"meta\":{\"index\":\"tsla\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"_type\",\"value\":\"stock\"},\"query\":{\"match\":{\"_type\":{\"query\":\"stock\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" + } + } + }, + { + "_id": "tsla-stock-visualization-4", + "_type": "visualization", + "_source": { + "title": "tsla_tweets", + "visState": "{\"title\":\"tsla_tweets\",\"type\":\"line\",\"params\":{\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{\"text\":\"date per 30 seconds\"}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"interpolate\":\"linear\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"bottom\",\"times\":[],\"addTimeMarker\":false,\"type\":\"line\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{}}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"tsla\",\"query\":{\"match_all\":{}},\"filter\":[]}" + } + } + }, + { + "_id": "tsla-stock-visualization-5", + "_type": "visualization", + "_source": { + "title": "tsla_wordcloud", + "visState": "{\n \"title\": \"tsla_wordcloud\",\n \"type\": \"tagcloud\",\n \"params\": {\n \"scale\": \"linear\",\n \"orientation\": \"single\",\n \"minFontSize\": 14,\n \"maxFontSize\": 36,\n \"type\": \"tagcloud\"\n },\n \"aggs\": [\n {\n \"id\": \"1\",\n \"enabled\": true,\n \"type\": \"count\",\n \"schema\": \"metric\",\n \"params\": {}\n },\n {\n \"id\": \"2\",\n \"enabled\": true,\n \"type\": \"terms\",\n \"schema\": \"segment\",\n \"params\": {\n \"field\": \"message.keyword\",\n \"size\": 25,\n \"order\": \"desc\",\n \"orderBy\": \"1\"\n }\n }\n ],\n \"listeners\": {}\n}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\n \"index\": \"tsla\",\n \"query\": {\n \"match_all\": {}\n },\n \"filter\": []\n}" + } + } + } +] \ No newline at end of file diff --git a/sentiment.py b/sentiment.py index b569a9c..2aef4d7 100644 --- a/sentiment.py +++ b/sentiment.py @@ -199,7 +199,7 @@ def on_timeout(self): class NewsHeadlineListener: - def __init__(self, url=None, frequency=120): + def __init__(self, url=None, frequency=3600): self.url = url self.headlines = [] self.followedlinks = [] @@ -223,7 +223,7 @@ def __init__(self, url=None, frequency=120): # create tokens of words in text using nltk text_for_tokens = re.sub( r"[\%|\$|\.|\,|\!|\:|\@]|\(|\)|\#|\+|(``)|('')|\?|\-", "", htext) - tokens = nltk.word_tokenize(text_for_tokens) + tokens = nltk.word_tokenize(text_for_tokens.lower()) print("NLTK Tokens: " + str(tokens)) # check ignored tokens from config @@ -233,7 +233,14 @@ def __init__(self, url=None, frequency=120): continue # check required tokens from config tokenspass = False - for t in nltk_tokens_required: + + + if args.index in nltk_tokens_required: + nltk_tokens = nltk_tokens_required[args.index] + else: + nltk_tokens = nltk_tokens_required['default'] + + for t in nltk_tokens: if t in tokens: tokenspass = True break @@ -486,8 +493,8 @@ def get_twitter_users_from_file(file): help="Use twitter user ids from file") parser.add_argument("-n", "--newsheadlines", metavar="SYMBOL", help="Get news headlines instead of Twitter using stock symbol, example: TSLA") - parser.add_argument("--frequency", metavar="FREQUENCY", default=120, type=int, - help="How often in seconds to retrieve news headlines (default: 120 sec)") + parser.add_argument("--frequency", metavar="FREQUENCY", default=3600, type=int, + help="How often in seconds to retrieve news headlines (default: 3600 sec)") parser.add_argument("--followlinks", action="store_true", help="Follow links on news headlines and scrape relevant text from landing page") parser.add_argument("-v", "--verbose", action="store_true", @@ -785,14 +792,20 @@ def get_twitter_users_from_file(file): logger.info('Twitter keywords: ' + str(args.keywords)) logger.info('Listening for Tweets (ctrl-c to exit)...') if args.keywords is None: - stream.filter(follow=useridlist, languages=['en']) + stream.filter(follow= str(useridlist), languages=['en']) else: # keywords to search on twitter # add keywords to list keywords = args.keywords.split(',') + + if args.index in nltk_tokens_required: + nltk_tokens = nltk_tokens_required[args.index] + else: + nltk_tokens = nltk_tokens_required['default'] + # add tokens to keywords to list - for f in nltk_tokens_required: - keywords.append(f) + for f in nltk_tokens: + keywords.append(f.lower()) stream.filter(track=keywords, languages=['en']) except TweepError as te: logger.debug("Tweepy Exception: Failed to get tweets caused by: %s" % te) diff --git a/startup.sh b/startup.sh new file mode 100755 index 0000000..d90f57f --- /dev/null +++ b/startup.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +python sentiment.py -n TSLA --followlinks -i tsla & +python stockprice.py -s TSLA -i tsla & +python sentiment.py -n AMD --followlinks -i amd & +python stockprice.py -s AMD -i amd & \ No newline at end of file From 9dca23440fdadaec5fea42878514fc9d7e40509d Mon Sep 17 00:00:00 2001 From: heyqule Date: Fri, 23 Aug 2019 00:02:48 -0400 Subject: [PATCH 03/55] Fequency adjustment Dashboard fix --- kibana_exports/amd_export.json | 2 +- kibana_exports/tsla_export.json | 2 +- stockprice.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kibana_exports/amd_export.json b/kibana_exports/amd_export.json index a1ce780..755822b 100644 --- a/kibana_exports/amd_export.json +++ b/kibana_exports/amd_export.json @@ -6,7 +6,7 @@ "title": "amd_dashboard", "hits": 0, "description": "", - "panelsJSON": "[{\"col\":1,\"id\":\"AWZO7a1n0rkQl37xrvXK\",\"panelIndex\":1,\"row\":3,\"size_x\":3,\"size_y\":3,\"type\":\"visualization\"},{\"col\":9,\"id\":\"AWZO8wUR0rkQl37xrvXV\",\"panelIndex\":2,\"row\":3,\"size_x\":4,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"columns\":[\"author\",\"location\",\"message\",\"polarity\",\"subjectivity\",\"sentiment\"],\"id\":\"AWZO_6iv0rkQl37xrvXt\",\"panelIndex\":3,\"row\":6,\"size_x\":12,\"size_y\":4,\"sort\":[\"date\",\"desc\"],\"type\":\"search\"},{\"col\":1,\"id\":\"AWZW6DNS0rkQl37xrvcg\",\"panelIndex\":4,\"row\":10,\"size_x\":12,\"size_y\":4,\"type\":\"visualization\"},{\"col\":4,\"id\":\"AWZYOrcih4RzKn4w3M7J\",\"panelIndex\":5,\"row\":3,\"size_x\":5,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"id\":\"AWZY6Xtjh4RzKn4w3NXT\",\"panelIndex\":6,\"row\":1,\"size_x\":12,\"size_y\":2,\"type\":\"visualization\"}]", + "panelsJSON": "[{\"col\":1,\"id\":\"amd-stock-visualization-2\",\"panelIndex\":1,\"row\":3,\"size_x\":3,\"size_y\":3,\"type\":\"visualization\"},{\"col\":9,\"id\":\"amd-stock-visualization-4\",\"panelIndex\":2,\"row\":3,\"size_x\":4,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"columns\":[\"author\",\"location\",\"message\",\"polarity\",\"subjectivity\",\"sentiment\"],\"id\":\"amd-stock-search\",\"panelIndex\":3,\"row\":6,\"size_x\":12,\"size_y\":4,\"sort\":[\"date\",\"desc\"],\"type\":\"search\"},{\"col\":1,\"id\":\"amd-stock-visualization-5\",\"panelIndex\":4,\"row\":10,\"size_x\":12,\"size_y\":4,\"type\":\"visualization\"},{\"col\":4,\"id\":\"amd-stock-visualization-3\",\"panelIndex\":5,\"row\":3,\"size_x\":5,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"id\":\"amd-stock-visualization-1\",\"panelIndex\":6,\"row\":1,\"size_x\":12,\"size_y\":2,\"type\":\"visualization\"}]", "optionsJSON": "{\"darkTheme\":true}", "uiStateJSON": "{\"P-2\":{\"vis\":{\"legendOpen\":true}},\"P-6\":{\"vis\":{\"defaultColors\":{\"0 - 1\":\"rgb(0,104,55)\"}}}}", "version": 1, diff --git a/kibana_exports/tsla_export.json b/kibana_exports/tsla_export.json index cc4ab6e..c0c058e 100644 --- a/kibana_exports/tsla_export.json +++ b/kibana_exports/tsla_export.json @@ -6,7 +6,7 @@ "title": "tsla_dashboard", "hits": 0, "description": "", - "panelsJSON": "[{\"col\":1,\"id\":\"AWZO7a1n0rkQl37xrvXK\",\"panelIndex\":1,\"row\":3,\"size_x\":3,\"size_y\":3,\"type\":\"visualization\"},{\"col\":9,\"id\":\"AWZO8wUR0rkQl37xrvXV\",\"panelIndex\":2,\"row\":3,\"size_x\":4,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"columns\":[\"author\",\"location\",\"message\",\"polarity\",\"subjectivity\",\"sentiment\"],\"id\":\"AWZO_6iv0rkQl37xrvXt\",\"panelIndex\":3,\"row\":6,\"size_x\":12,\"size_y\":4,\"sort\":[\"date\",\"desc\"],\"type\":\"search\"},{\"col\":1,\"id\":\"AWZW6DNS0rkQl37xrvcg\",\"panelIndex\":4,\"row\":10,\"size_x\":12,\"size_y\":4,\"type\":\"visualization\"},{\"col\":4,\"id\":\"AWZYOrcih4RzKn4w3M7J\",\"panelIndex\":5,\"row\":3,\"size_x\":5,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"id\":\"AWZY6Xtjh4RzKn4w3NXT\",\"panelIndex\":6,\"row\":1,\"size_x\":12,\"size_y\":2,\"type\":\"visualization\"}]", + "panelsJSON": "[{\"col\":1,\"id\":\"tsla-stock-visualization-2\",\"panelIndex\":1,\"row\":3,\"size_x\":3,\"size_y\":3,\"type\":\"visualization\"},{\"col\":9,\"id\":\"tsla-stock-visualization-4\",\"panelIndex\":2,\"row\":3,\"size_x\":4,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"columns\":[\"author\",\"location\",\"message\",\"polarity\",\"subjectivity\",\"sentiment\"],\"id\":\"tsla-stock-search\",\"panelIndex\":3,\"row\":6,\"size_x\":12,\"size_y\":4,\"sort\":[\"date\",\"desc\"],\"type\":\"search\"},{\"col\":1,\"id\":\"tsla-stock-visualization-5\",\"panelIndex\":4,\"row\":10,\"size_x\":12,\"size_y\":4,\"type\":\"visualization\"},{\"col\":4,\"id\":\"tsla-stock-visualization-3\",\"panelIndex\":5,\"row\":3,\"size_x\":5,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"id\":\"tsla-stock-visualization-1\",\"panelIndex\":6,\"row\":1,\"size_x\":12,\"size_y\":2,\"type\":\"visualization\"}]", "optionsJSON": "{\"darkTheme\":true}", "uiStateJSON": "{\"P-2\":{\"vis\":{\"legendOpen\":true}},\"P-6\":{\"vis\":{\"defaultColors\":{\"0 - 1\":\"rgb(0,104,55)\"}}}}", "version": 1, diff --git a/stockprice.py b/stockprice.py index f671398..217f4e4 100644 --- a/stockprice.py +++ b/stockprice.py @@ -123,7 +123,7 @@ def get_price(self, url, symbol): help="Delete existing Elasticsearch index first") parser.add_argument("-s", "--symbol", metavar="SYMBOL", help="Stock symbol to use, example: TSLA") - parser.add_argument("-f", "--frequency", metavar="FREQUENCY", default=120, type=int, + parser.add_argument("-f", "--frequency", metavar="FREQUENCY", default=600, type=int, help="How often in seconds to retrieve stock data (default: 120 sec)") parser.add_argument("-v", "--verbose", action="store_true", help="Increase output verbosity") From 74e6b360dfa71062cd58ebb615d3cd29e3fd99d8 Mon Sep 17 00:00:00 2001 From: heyqule Date: Sun, 25 Aug 2019 16:11:56 -0400 Subject: [PATCH 04/55] Fully automate the build with docker --- .gitignore | 1 + Python.Dockerfile | 11 +++++++++++ docker-compose.yml | 14 +++++++++++--- kibana_exports/amd_export.json | 2 +- kibana_exports/tsla_export.json | 2 +- config.py.sample => src/config.py.sample | 2 +- sentiment.py => src/sentiment.py | 13 +++++++------ startup.sh => src/startup.sh | 12 +++++++++++- stockprice.py => src/stockprice.py | 7 ++++--- twitteruserids.txt => src/twitteruserids.txt | 0 10 files changed, 48 insertions(+), 16 deletions(-) create mode 100644 Python.Dockerfile rename config.py.sample => src/config.py.sample (97%) rename sentiment.py => src/sentiment.py (99%) rename startup.sh => src/startup.sh (59%) rename stockprice.py => src/stockprice.py (99%) rename twitteruserids.txt => src/twitteruserids.txt (100%) diff --git a/.gitignore b/.gitignore index b80a2f8..67ebd3d 100644 --- a/.gitignore +++ b/.gitignore @@ -102,6 +102,7 @@ data/ config.py .git .idea +twitteruserids.txt # mypy .mypy_cache/ diff --git a/Python.Dockerfile b/Python.Dockerfile new file mode 100644 index 0000000..401be32 --- /dev/null +++ b/Python.Dockerfile @@ -0,0 +1,11 @@ +FROM python:3 + +WORKDIR /usr/src/app + +ADD requirements.txt ./requirements.txt + +RUN pip install --no-cache-dir -r requirements.txt + +RUN [ "python", "-c", "import nltk; nltk.download('punkt')" ] + +ENTRYPOINT ["sh","startup.sh"] \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 9fb5941..cec6296 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -21,9 +21,17 @@ services: nproc: soft: 2048 hard: 2048 - ports: - - "9200:9200" kibana: image: docker.elastic.co/kibana/kibana:5.6.16 + depends_on: + - elasticsearch ports: - - "5601:5601" \ No newline at end of file + - "5601:5601" + python3: + build: + context: ./ + dockerfile: Python.Dockerfile + depends_on: + - elasticsearch + volumes: + - ./src:/usr/src/app diff --git a/kibana_exports/amd_export.json b/kibana_exports/amd_export.json index 755822b..c744bf8 100644 --- a/kibana_exports/amd_export.json +++ b/kibana_exports/amd_export.json @@ -74,7 +74,7 @@ "_type": "visualization", "_source": { "title": "amd_stockprice", - "visState": "{\"title\":\"amd_stockprice\",\"type\":\"line\",\"params\":{\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{\"text\":\"date per 30 seconds\"}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Sum of price_last\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Sum of price_last\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"3\",\"label\":\"Sum of price_high\"},\"valueAxis\":\"ValueAxis-1\"},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"4\",\"label\":\"Sum of price_low\"},\"valueAxis\":\"ValueAxis-1\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"bottom\",\"times\":[],\"addTimeMarker\":false,\"type\":\"line\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"price_last\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"enabled\":true,\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"price_high\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"price_low\"}}],\"listeners\":{}}", + "visState": "{\"title\":\"amd_stockprice\",\"type\":\"line\",\"params\":{\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{\"text\":\"date per 30 seconds\"}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Avg of price_last\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Avg of price_last\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"3\",\"label\":\"Avg of price_high\"},\"valueAxis\":\"ValueAxis-1\"},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"4\",\"label\":\"Avg of price_low\"},\"valueAxis\":\"ValueAxis-1\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"bottom\",\"times\":[],\"addTimeMarker\":false,\"type\":\"line\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_last\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_high\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_low\"}}],\"listeners\":{}}", "uiStateJSON": "{}", "description": "", "version": 1, diff --git a/kibana_exports/tsla_export.json b/kibana_exports/tsla_export.json index c0c058e..206a0aa 100644 --- a/kibana_exports/tsla_export.json +++ b/kibana_exports/tsla_export.json @@ -74,7 +74,7 @@ "_type": "visualization", "_source": { "title": "tsla_stockprice", - "visState": "{\"title\":\"tsla_stockprice\",\"type\":\"line\",\"params\":{\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{\"text\":\"date per 30 seconds\"}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Sum of price_last\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Sum of price_last\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"3\",\"label\":\"Sum of price_high\"},\"valueAxis\":\"ValueAxis-1\"},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"4\",\"label\":\"Sum of price_low\"},\"valueAxis\":\"ValueAxis-1\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"bottom\",\"times\":[],\"addTimeMarker\":false,\"type\":\"line\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"price_last\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"enabled\":true,\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"price_high\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"price_low\"}}],\"listeners\":{}}", + "visState": "{\"title\":\"tsla_stockprice\",\"type\":\"line\",\"params\":{\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{\"text\":\"date per 30 seconds\"}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Avg of price_last\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Avg of price_last\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"3\",\"label\":\"Avg of price_high\"},\"valueAxis\":\"ValueAxis-1\"},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"4\",\"label\":\"Avg of price_low\"},\"valueAxis\":\"ValueAxis-1\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"bottom\",\"times\":[],\"addTimeMarker\":false,\"type\":\"line\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_last\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_high\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_low\"}}],\"listeners\":{}}", "uiStateJSON": "{}", "description": "", "version": 1, diff --git a/config.py.sample b/src/config.py.sample similarity index 97% rename from config.py.sample rename to src/config.py.sample index 6ec2a09..a4ab396 100644 --- a/config.py.sample +++ b/src/config.py.sample @@ -1,4 +1,4 @@ -elasticsearch_host = "localhost" +elasticsearch_host = "elasticsearch" elasticsearch_port = 9200 elasticsearch_user = "" elasticsearch_password = "" diff --git a/sentiment.py b/src/sentiment.py similarity index 99% rename from sentiment.py rename to src/sentiment.py index 2aef4d7..eab03d4 100644 --- a/sentiment.py +++ b/src/sentiment.py @@ -11,15 +11,16 @@ LICENSE for the full license text. """ -import sys +import argparse import json -import time +import logging import re -import unicodedata -import requests +import sys +import time + import nltk -import argparse -import logging +import requests + try: import urllib.parse as urlparse except ImportError: diff --git a/startup.sh b/src/startup.sh similarity index 59% rename from startup.sh rename to src/startup.sh index d90f57f..d558b10 100755 --- a/startup.sh +++ b/src/startup.sh @@ -1,6 +1,16 @@ #!/bin/bash +sleep 15 python sentiment.py -n TSLA --followlinks -i tsla & +sleep 1 python stockprice.py -s TSLA -i tsla & +sleep 1 python sentiment.py -n AMD --followlinks -i amd & -python stockprice.py -s AMD -i amd & \ No newline at end of file +sleep 1 +python stockprice.py -s AMD -i amd & + + +while true +do + sleep 60 +done diff --git a/stockprice.py b/src/stockprice.py similarity index 99% rename from stockprice.py rename to src/stockprice.py index 217f4e4..d16cd48 100644 --- a/stockprice.py +++ b/src/stockprice.py @@ -10,12 +10,13 @@ LICENSE for the full license text. """ -import time -import requests -import re import argparse import logging import sys +import time + +import requests + try: from elasticsearch5 import Elasticsearch except ImportError: diff --git a/twitteruserids.txt b/src/twitteruserids.txt similarity index 100% rename from twitteruserids.txt rename to src/twitteruserids.txt From 53408c918642f616f88140cfe715eb4fadab9645 Mon Sep 17 00:00:00 2001 From: heyqule Date: Sun, 25 Aug 2019 23:30:50 -0400 Subject: [PATCH 05/55] Add support to bypass fetching stock price outside of regular hours. Doc change --- README.md | 7 ++++ docker-compose.yml | 3 ++ requirements.txt | 1 + src/{config.py.sample => config.sample.py} | 21 +++++++++--- src/sentiment.py | 24 ++++++++++++-- src/startup.sh | 2 +- src/stockprice.py | 38 ++++++++++++++++++---- 7 files changed, 81 insertions(+), 15 deletions(-) rename src/{config.py.sample => config.sample.py} (65%) diff --git a/README.md b/README.md index e9f731e..1b9ab89 100644 --- a/README.md +++ b/README.md @@ -139,3 +139,10 @@ optional arguments: -q, --quiet Run quiet with no message output -V, --version Prints version and exits ``` + +### HOWTO DOCKER +- Change config.py +- Change startup.sh to include your tickers +- run docker-compose up +- ??? +- Profit \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index cec6296..35340bf 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -21,6 +21,9 @@ services: nproc: soft: 2048 hard: 2048 + #expose this for local dev only! + #ports: + # - "9200:9200" kibana: image: docker.elastic.co/kibana/kibana:5.6.16 depends_on: diff --git a/requirements.txt b/requirements.txt index 45f514e..a625eed 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ tweepy beautifulsoup4 textblob vaderSentiment +pytz \ No newline at end of file diff --git a/src/config.py.sample b/src/config.sample.py similarity index 65% rename from src/config.py.sample rename to src/config.sample.py index a4ab396..6da12d2 100644 --- a/src/config.py.sample +++ b/src/config.sample.py @@ -1,15 +1,18 @@ +#Global Config elasticsearch_host = "elasticsearch" elasticsearch_port = 9200 elasticsearch_user = "" elasticsearch_password = "" + +#Sentiment Analyizers config consumer_key = "" consumer_secret = "" access_token = "" access_token_secret = "" nltk_tokens_required = { - 'default': ("Tesla", "@Tesla", "#Tesla", "tesla", "TSLA", "tsla", "#TSLA", "#tsla", "elonmusk", "Elon", "Musk"), - 'tsla': ("Tesla", "@Tesla", "#Tesla", "tesla", "TSLA", "tsla", "#TSLA", "#tsla", "elonmusk", "Elon", "Musk"), - 'amd': ('amd','ryzen','epyc','radeon','server','data','center','crossfire','threadripper') + 'default': ("increase","decrease","buying","sold","buy","selling","winning","losing"), + 'tsla': ("tesla", "@tesla", "#tesla", "tsla", "#tsla", "elonmusk", "elon", "musk"), + 'amd': ('amd','ryzen','epyc','radeon','crossfire','threadripper') } nltk_tokens_ignored = ("win", "Win", "giveaway", "Giveaway") twitter_feeds = ["@elonmusk", "@cnbc", "@benzinga", "@stockwits", @@ -19,5 +22,13 @@ "@Carl_C_Icahn", "@ReformedBroker", "@bespokeinvest", "@stlouisfed", "@muddywatersre", "@mcuban", "@AswathDamodaran", "@elerianm", "@MorganStanley", "@ianbremmer", "@GoldmanSachs", "@Wu_Tang_Finance", - "@Schuldensuehner", "@NorthmanTrader", "@Frances_Coppola", "@bySamRo", - "@BuzzFeed","@nytimes"] + "@Schuldensuehner", "@NorthmanTrader", "@Frances_Coppola", "@BuzzFeed","@nytimes"] +sentiment_frequency = 3600 + +#Stock Price fetcher config +price_frequency = 900 +weekday_start = 1 +weekday_end = 5 +hour_start = 9 +hour_end = 18 +timezone_str = 'America/Toronto' \ No newline at end of file diff --git a/src/sentiment.py b/src/sentiment.py index eab03d4..570f7ba 100644 --- a/src/sentiment.py +++ b/src/sentiment.py @@ -200,15 +200,18 @@ def on_timeout(self): class NewsHeadlineListener: - def __init__(self, url=None, frequency=3600): + def __init__(self, url=None, frequency=sentiment_frequency): self.url = url self.headlines = [] self.followedlinks = [] self.frequency = frequency + self.max_cache = 1000; while True: new_headlines = self.get_news_headlines(self.url) + self.cleanup() + # add any new headlines for htext, htext_url in new_headlines: if htext not in self.headlines: @@ -265,6 +268,21 @@ def __init__(self, url=None, frequency=3600): logger.info("Will get news headlines again in %s sec..." % self.frequency) time.sleep(self.frequency) + def cleanup(self): + new_headline = [] + new_followlink = [] + if len(self.headlines) > self.max_cache: + for i in range(self.max_cache / 2, len(self.headlines) - 1): + new_headline.append(self.headlines[i]) + + self.headlines = new_headline + + if len(self.followedlinks) > self.max_cache: + for i in range(self.max_cache / 2, len(self.followedlinks) - 1): + new_followlink.append(self.followedlinks[i]) + + self.followedlinks = new_followlink + def get_news_headlines(self, url): @@ -494,8 +512,8 @@ def get_twitter_users_from_file(file): help="Use twitter user ids from file") parser.add_argument("-n", "--newsheadlines", metavar="SYMBOL", help="Get news headlines instead of Twitter using stock symbol, example: TSLA") - parser.add_argument("--frequency", metavar="FREQUENCY", default=3600, type=int, - help="How often in seconds to retrieve news headlines (default: 3600 sec)") + parser.add_argument("--frequency", metavar="FREQUENCY", default=sentiment_frequency, type=int, + help="How often in seconds to retrieve news headlines (default: %d sec)" % sentiment_frequency) parser.add_argument("--followlinks", action="store_true", help="Follow links on news headlines and scrape relevant text from landing page") parser.add_argument("-v", "--verbose", action="store_true", diff --git a/src/startup.sh b/src/startup.sh index d558b10..84d8219 100755 --- a/src/startup.sh +++ b/src/startup.sh @@ -1,6 +1,6 @@ #!/bin/bash -sleep 15 +sleep 30 python sentiment.py -n TSLA --followlinks -i tsla & sleep 1 python stockprice.py -s TSLA -i tsla & diff --git a/src/stockprice.py b/src/stockprice.py index d16cd48..d30309f 100644 --- a/src/stockprice.py +++ b/src/stockprice.py @@ -14,8 +14,10 @@ import logging import sys import time - +import datetime +import re import requests +from pytz import timezone try: from elasticsearch5 import Elasticsearch @@ -24,7 +26,8 @@ from random import randint # import elasticsearch host -from config import elasticsearch_host, elasticsearch_port, elasticsearch_user, elasticsearch_password +from config import elasticsearch_host, elasticsearch_port, elasticsearch_user, elasticsearch_password, \ + price_frequency, weekday_start, weekday_end, hour_start, hour_end, timezone_str STOCKSIGHT_VERSION = '0.1-b.5' @@ -37,19 +40,31 @@ es = Elasticsearch(hosts=[{'host': elasticsearch_host, 'port': elasticsearch_port}], http_auth=(elasticsearch_user, elasticsearch_password)) +regex = re + class GetStock: def get_price(self, url, symbol): - import re + + eastern_timezone = timezone(timezone_str) while True: + if self.isNotLive(eastern_timezone): + #logger.info("Stock market is not live. Current time: %s" % datetime.datetime.now(timezone).strftime("%Y-%m-%d %H:%M")) + today = datetime.datetime.now(eastern_timezone) + logger.info("Stock market is not live. Current time: %s" % today.strftime('%H')) + logger.info("Will get stock data again in %s sec..." % args.frequency) + time.sleep(args.frequency) + continue + + logger.info("Grabbing stock data for symbol %s..." % symbol) try: # add stock symbol to url - url = re.sub("SYMBOL", symbol, url) + url = regex.sub("SYMBOL", symbol, url) # get stock data (json) from url try: r = requests.get(url) @@ -113,6 +128,17 @@ def get_price(self, url, symbol): logger.info("Will get stock data again in %s sec..." % args.frequency) time.sleep(args.frequency) + def isNotLive(self, timezone): + today = datetime.datetime.now(timezone); + if today.weekday() >= weekday_start and \ + today.weekday() <= weekday_end and \ + today.hour() >= hour_start and \ + today.hour() <= hour_end: + return False; + + return True; + + if __name__ == '__main__': @@ -124,8 +150,8 @@ def get_price(self, url, symbol): help="Delete existing Elasticsearch index first") parser.add_argument("-s", "--symbol", metavar="SYMBOL", help="Stock symbol to use, example: TSLA") - parser.add_argument("-f", "--frequency", metavar="FREQUENCY", default=600, type=int, - help="How often in seconds to retrieve stock data (default: 120 sec)") + parser.add_argument("-f", "--frequency", metavar="FREQUENCY", default=price_frequency, type=int, + help="How often in seconds to retrieve stock data (default: %d sec)" % price_frequency) parser.add_argument("-v", "--verbose", action="store_true", help="Increase output verbosity") parser.add_argument("--debug", action="store_true", From d5393e7e492b1abccf6fd80d164e8490da40eaed Mon Sep 17 00:00:00 2001 From: heyqule Date: Sun, 25 Aug 2019 23:45:47 -0400 Subject: [PATCH 06/55] Fix time display --- src/stockprice.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/stockprice.py b/src/stockprice.py index d30309f..596e68d 100644 --- a/src/stockprice.py +++ b/src/stockprice.py @@ -51,9 +51,8 @@ def get_price(self, url, symbol): while True: if self.isNotLive(eastern_timezone): - #logger.info("Stock market is not live. Current time: %s" % datetime.datetime.now(timezone).strftime("%Y-%m-%d %H:%M")) today = datetime.datetime.now(eastern_timezone) - logger.info("Stock market is not live. Current time: %s" % today.strftime('%H')) + logger.info("Stock market is not live. Current time: %s" % today.strftime("%Y-%m-%d %H:%M")) logger.info("Will get stock data again in %s sec..." % args.frequency) time.sleep(args.frequency) continue From 4dc0238711234c440f0d351dddf32d504ed217af Mon Sep 17 00:00:00 2001 From: heyqule Date: Mon, 26 Aug 2019 09:40:01 -0400 Subject: [PATCH 07/55] Optimization --- Python.Dockerfile | 2 +- src/sentiment.py | 11 +++++++---- src/startup.sh | 6 +++--- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/Python.Dockerfile b/Python.Dockerfile index 401be32..6341667 100644 --- a/Python.Dockerfile +++ b/Python.Dockerfile @@ -1,4 +1,4 @@ -FROM python:3 +FROM python:3-alpine WORKDIR /usr/src/app diff --git a/src/sentiment.py b/src/sentiment.py index 570f7ba..03a9fd5 100644 --- a/src/sentiment.py +++ b/src/sentiment.py @@ -210,8 +210,6 @@ def __init__(self, url=None, frequency=sentiment_frequency): while True: new_headlines = self.get_news_headlines(self.url) - self.cleanup() - # add any new headlines for htext, htext_url in new_headlines: if htext not in self.headlines: @@ -266,8 +264,12 @@ def __init__(self, url=None, frequency=sentiment_frequency): "subjectivity": subjectivity, "sentiment": sentiment}) + new_headlines = None; + self.cleanup() + logger.info("Will get news headlines again in %s sec..." % self.frequency) time.sleep(self.frequency) + def cleanup(self): new_headline = [] new_followlink = [] @@ -303,7 +305,7 @@ def get_news_headlines(self, url): if html: for i in html: - latestheadlines.append((i.next.next.next.next, url)) + latestheadlines.append((unicode(i.next.next.next.next), url)) logger.debug(latestheadlines) if args.followlinks: @@ -320,7 +322,7 @@ def get_news_headlines(self, url): for linkurl in latestheadlines_links: for p in get_page_text(linkurl): - latestheadlines.append((p, linkurl)) + latestheadlines.append((unicode(p), linkurl)) logger.debug(latestheadlines) except requests.exceptions.RequestException as re: @@ -463,6 +465,7 @@ def get_twitter_users_from_url(url): html_links = [] for link in soup.findAll('a'): html_links.append(link.get('href')) + if html_links: for link in html_links: # check if twitter_url in link diff --git a/src/startup.sh b/src/startup.sh index 84d8219..4adc01b 100755 --- a/src/startup.sh +++ b/src/startup.sh @@ -1,16 +1,16 @@ #!/bin/bash sleep 30 -python sentiment.py -n TSLA --followlinks -i tsla & +python sentiment.py -n TSLA -i tsla & sleep 1 python stockprice.py -s TSLA -i tsla & sleep 1 -python sentiment.py -n AMD --followlinks -i amd & +python sentiment.py -n AMD -i amd & sleep 1 python stockprice.py -s AMD -i amd & while true do - sleep 60 + sleep 3600 done From b7cda287d826cf0d08db02e9c03d07ab771403a6 Mon Sep 17 00:00:00 2001 From: heyqule Date: Mon, 26 Aug 2019 10:03:21 -0400 Subject: [PATCH 08/55] Fix hour() error --- src/stockprice.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/stockprice.py b/src/stockprice.py index 596e68d..dba072d 100644 --- a/src/stockprice.py +++ b/src/stockprice.py @@ -131,8 +131,8 @@ def isNotLive(self, timezone): today = datetime.datetime.now(timezone); if today.weekday() >= weekday_start and \ today.weekday() <= weekday_end and \ - today.hour() >= hour_start and \ - today.hour() <= hour_end: + today.hour >= hour_start and \ + today.hour <= hour_end: return False; return True; From a8bf22ac06c7469522d5842c8b31d6eaf67cca82 Mon Sep 17 00:00:00 2001 From: heyqule Date: Tue, 27 Aug 2019 17:43:00 -0400 Subject: [PATCH 09/55] Fix Cache Cleaning issue --- src/sentiment.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/sentiment.py b/src/sentiment.py index 03a9fd5..482e959 100644 --- a/src/sentiment.py +++ b/src/sentiment.py @@ -273,17 +273,16 @@ def __init__(self, url=None, frequency=sentiment_frequency): def cleanup(self): new_headline = [] new_followlink = [] + if len(self.headlines) > self.max_cache: for i in range(self.max_cache / 2, len(self.headlines) - 1): new_headline.append(self.headlines[i]) - - self.headlines = new_headline + self.headlines = new_headline if len(self.followedlinks) > self.max_cache: for i in range(self.max_cache / 2, len(self.followedlinks) - 1): new_followlink.append(self.followedlinks[i]) - - self.followedlinks = new_followlink + self.followedlinks = new_followlink def get_news_headlines(self, url): From aea6a592979d4a3dbad96ebab8cbc71e03522a3e Mon Sep 17 00:00:00 2001 From: heyqule Date: Tue, 27 Aug 2019 18:32:16 -0400 Subject: [PATCH 10/55] Change startup.sh to startup.sample.sh --- .gitignore | 1 + src/startup.sample.sh | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) create mode 100755 src/startup.sample.sh diff --git a/.gitignore b/.gitignore index 67ebd3d..7e104d1 100644 --- a/.gitignore +++ b/.gitignore @@ -100,6 +100,7 @@ ENV/ #Custom files data/ config.py +startup.sh .git .idea twitteruserids.txt diff --git a/src/startup.sample.sh b/src/startup.sample.sh new file mode 100755 index 0000000..4adc01b --- /dev/null +++ b/src/startup.sample.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +sleep 30 +python sentiment.py -n TSLA -i tsla & +sleep 1 +python stockprice.py -s TSLA -i tsla & +sleep 1 +python sentiment.py -n AMD -i amd & +sleep 1 +python stockprice.py -s AMD -i amd & + + +while true +do + sleep 3600 +done From 64ddc352e9d27eddf2127fd424525283477e2887 Mon Sep 17 00:00:00 2001 From: heyqule Date: Tue, 27 Aug 2019 18:36:29 -0400 Subject: [PATCH 11/55] Add Curl to python instance for cleaning purposes. --- Python.Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Python.Dockerfile b/Python.Dockerfile index 6341667..80569e0 100644 --- a/Python.Dockerfile +++ b/Python.Dockerfile @@ -4,6 +4,8 @@ WORKDIR /usr/src/app ADD requirements.txt ./requirements.txt +RUN apk --no-cache add curl + RUN pip install --no-cache-dir -r requirements.txt RUN [ "python", "-c", "import nltk; nltk.download('punkt')" ] From 004c17c9d525fbc5301df268f2458df527756ad3 Mon Sep 17 00:00:00 2001 From: heyqule Date: Tue, 27 Aug 2019 20:41:07 -0400 Subject: [PATCH 12/55] Clean cache --- .gitignore | 1 + export.json | 114 -------------------------------- kibana_exports/amd_export.json | 114 -------------------------------- kibana_exports/export.json | 114 ++++++++++++++++++++++++++++++++ kibana_exports/tsla_export.json | 114 -------------------------------- 5 files changed, 115 insertions(+), 342 deletions(-) delete mode 100644 export.json delete mode 100644 kibana_exports/amd_export.json create mode 100644 kibana_exports/export.json delete mode 100644 kibana_exports/tsla_export.json diff --git a/.gitignore b/.gitignore index 7e104d1..9c3558a 100644 --- a/.gitignore +++ b/.gitignore @@ -104,6 +104,7 @@ startup.sh .git .idea twitteruserids.txt +*_export.json # mypy .mypy_cache/ diff --git a/export.json b/export.json deleted file mode 100644 index 3654a0c..0000000 --- a/export.json +++ /dev/null @@ -1,114 +0,0 @@ -[ - { - "_id": "AWZPAfg50rkQl37xrvXw", - "_type": "dashboard", - "_source": { - "title": "stocksight_dashboard", - "hits": 0, - "description": "", - "panelsJSON": "[{\"col\":1,\"id\":\"AWZO7a1n0rkQl37xrvXK\",\"panelIndex\":1,\"row\":3,\"size_x\":3,\"size_y\":3,\"type\":\"visualization\"},{\"col\":9,\"id\":\"AWZO8wUR0rkQl37xrvXV\",\"panelIndex\":2,\"row\":3,\"size_x\":4,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"columns\":[\"author\",\"location\",\"message\",\"polarity\",\"subjectivity\",\"sentiment\"],\"id\":\"AWZO_6iv0rkQl37xrvXt\",\"panelIndex\":3,\"row\":6,\"size_x\":12,\"size_y\":4,\"sort\":[\"date\",\"desc\"],\"type\":\"search\"},{\"col\":1,\"id\":\"AWZW6DNS0rkQl37xrvcg\",\"panelIndex\":4,\"row\":10,\"size_x\":12,\"size_y\":4,\"type\":\"visualization\"},{\"col\":4,\"id\":\"AWZYOrcih4RzKn4w3M7J\",\"panelIndex\":5,\"row\":3,\"size_x\":5,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"id\":\"AWZY6Xtjh4RzKn4w3NXT\",\"panelIndex\":6,\"row\":1,\"size_x\":12,\"size_y\":2,\"type\":\"visualization\"}]", - "optionsJSON": "{\"darkTheme\":true}", - "uiStateJSON": "{\"P-2\":{\"vis\":{\"legendOpen\":true}},\"P-6\":{\"vis\":{\"defaultColors\":{\"0 - 1\":\"rgb(0,104,55)\"}}}}", - "version": 1, - "timeRestore": false, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"filter\":[{\"query\":{\"match_all\":{}}}],\"highlightAll\":true,\"version\":true}" - } - } - }, - { - "_id": "AWZO_6iv0rkQl37xrvXt", - "_type": "search", - "_source": { - "title": "stocksight_savesearch", - "description": "", - "hits": 0, - "columns": [ - "author", - "location", - "message", - "polarity", - "subjectivity", - "sentiment" - ], - "sort": [ - "date", - "desc" - ], - "version": 1, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"stocksight\",\"highlightAll\":true,\"version\":true,\"query\":{\"match_all\":{}},\"filter\":[{\"meta\":{\"index\":\"stocksight\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"_type\",\"value\":\"tweet\"},\"query\":{\"match\":{\"_type\":{\"query\":\"tweet\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" - } - } - }, - { - "_id": "AWZY6Xtjh4RzKn4w3NXT", - "_type": "visualization", - "_source": { - "title": "stocksight_polarity", - "visState": "{\"title\":\"stocksight_polarity\",\"type\":\"metric\",\"params\":{\"addTooltip\":true,\"addLegend\":false,\"type\":\"gauge\",\"gauge\":{\"verticalSplit\":false,\"autoExtend\":false,\"percentageMode\":false,\"gaugeType\":\"Metric\",\"gaugeStyle\":\"Full\",\"backStyle\":\"Full\",\"orientation\":\"vertical\",\"colorSchema\":\"Green to Red\",\"gaugeColorMode\":\"None\",\"useRange\":false,\"colorsRange\":[{\"from\":0,\"to\":1}],\"invertColors\":false,\"labels\":{\"show\":true,\"color\":\"black\"},\"scale\":{\"show\":false,\"labels\":false,\"color\":\"#333\",\"width\":2},\"type\":\"simple\",\"style\":{\"fontSize\":\"24\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\"},\"extendRange\":false}},\"aggs\":[{\"id\":\"5\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"median\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\",\"percents\":[50]}},{\"id\":\"2\",\"enabled\":true,\"type\":\"min\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}}],\"listeners\":{}}", - "uiStateJSON": "{\"vis\":{\"defaultColors\":{\"0 - 1\":\"rgb(0,104,55)\"}}}", - "description": "", - "version": 1, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"stocksight\",\"query\":{\"match_all\":{}},\"filter\":[]}" - } - } - }, - { - "_id": "AWZO7a1n0rkQl37xrvXK", - "_type": "visualization", - "_source": { - "title": "stocksight_sentinel", - "visState": "{\"title\":\"stocksight_sentinel\",\"type\":\"pie\",\"params\":{\"addLegend\":true,\"addTooltip\":true,\"isDonut\":false,\"legendPosition\":\"bottom\",\"type\":\"pie\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment.keyword\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{}}", - "uiStateJSON": "{}", - "description": "", - "version": 1, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"stocksight\",\"query\":{\"match_all\":{}},\"filter\":[]}" - } - } - }, - { - "_id": "AWZYOrcih4RzKn4w3M7J", - "_type": "visualization", - "_source": { - "title": "stocksight_stockprice", - "visState": "{\"title\":\"stocksight_stockprice\",\"type\":\"line\",\"params\":{\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{\"text\":\"date per 30 seconds\"}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Sum of price_last\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Sum of price_last\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"3\",\"label\":\"Sum of price_high\"},\"valueAxis\":\"ValueAxis-1\"},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"4\",\"label\":\"Sum of price_low\"},\"valueAxis\":\"ValueAxis-1\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"bottom\",\"times\":[],\"addTimeMarker\":false,\"type\":\"line\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"price_last\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"enabled\":true,\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"price_high\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"sum\",\"schema\":\"metric\",\"params\":{\"field\":\"price_low\"}}],\"listeners\":{}}", - "uiStateJSON": "{}", - "description": "", - "version": 1, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"stocksight\",\"query\":{\"match_all\":{}},\"filter\":[{\"meta\":{\"index\":\"stocksight\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"_type\",\"value\":\"stock\"},\"query\":{\"match\":{\"_type\":{\"query\":\"stock\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" - } - } - }, - { - "_id": "AWZO8wUR0rkQl37xrvXV", - "_type": "visualization", - "_source": { - "title": "stocksight_tweets", - "visState": "{\"title\":\"stocksight_tweets\",\"type\":\"line\",\"params\":{\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{\"text\":\"date per 30 seconds\"}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"interpolate\":\"linear\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"bottom\",\"times\":[],\"addTimeMarker\":false,\"type\":\"line\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{}}", - "uiStateJSON": "{}", - "description": "", - "version": 1, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"stocksight\",\"query\":{\"match_all\":{}},\"filter\":[]}" - } - } - }, - { - "_id": "AWZW6DNS0rkQl37xrvcg", - "_type": "visualization", - "_source": { - "title": "stocksight_wordcloud", - "visState": "{\n \"title\": \"stocksight_wordcloud\",\n \"type\": \"tagcloud\",\n \"params\": {\n \"scale\": \"linear\",\n \"orientation\": \"single\",\n \"minFontSize\": 14,\n \"maxFontSize\": 36,\n \"type\": \"tagcloud\"\n },\n \"aggs\": [\n {\n \"id\": \"1\",\n \"enabled\": true,\n \"type\": \"count\",\n \"schema\": \"metric\",\n \"params\": {}\n },\n {\n \"id\": \"2\",\n \"enabled\": true,\n \"type\": \"terms\",\n \"schema\": \"segment\",\n \"params\": {\n \"field\": \"message.keyword\",\n \"size\": 25,\n \"order\": \"desc\",\n \"orderBy\": \"1\"\n }\n }\n ],\n \"listeners\": {}\n}", - "uiStateJSON": "{}", - "description": "", - "version": 1, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\n \"index\": \"stocksight\",\n \"query\": {\n \"match_all\": {}\n },\n \"filter\": []\n}" - } - } - } -] \ No newline at end of file diff --git a/kibana_exports/amd_export.json b/kibana_exports/amd_export.json deleted file mode 100644 index c744bf8..0000000 --- a/kibana_exports/amd_export.json +++ /dev/null @@ -1,114 +0,0 @@ -[ - { - "_id": "amd-stock-dashboard", - "_type": "dashboard", - "_source": { - "title": "amd_dashboard", - "hits": 0, - "description": "", - "panelsJSON": "[{\"col\":1,\"id\":\"amd-stock-visualization-2\",\"panelIndex\":1,\"row\":3,\"size_x\":3,\"size_y\":3,\"type\":\"visualization\"},{\"col\":9,\"id\":\"amd-stock-visualization-4\",\"panelIndex\":2,\"row\":3,\"size_x\":4,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"columns\":[\"author\",\"location\",\"message\",\"polarity\",\"subjectivity\",\"sentiment\"],\"id\":\"amd-stock-search\",\"panelIndex\":3,\"row\":6,\"size_x\":12,\"size_y\":4,\"sort\":[\"date\",\"desc\"],\"type\":\"search\"},{\"col\":1,\"id\":\"amd-stock-visualization-5\",\"panelIndex\":4,\"row\":10,\"size_x\":12,\"size_y\":4,\"type\":\"visualization\"},{\"col\":4,\"id\":\"amd-stock-visualization-3\",\"panelIndex\":5,\"row\":3,\"size_x\":5,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"id\":\"amd-stock-visualization-1\",\"panelIndex\":6,\"row\":1,\"size_x\":12,\"size_y\":2,\"type\":\"visualization\"}]", - "optionsJSON": "{\"darkTheme\":true}", - "uiStateJSON": "{\"P-2\":{\"vis\":{\"legendOpen\":true}},\"P-6\":{\"vis\":{\"defaultColors\":{\"0 - 1\":\"rgb(0,104,55)\"}}}}", - "version": 1, - "timeRestore": false, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"filter\":[{\"query\":{\"match_all\":{}}}],\"highlightAll\":true,\"version\":true}" - } - } - }, - { - "_id": "amd-stock-search", - "_type": "search", - "_source": { - "title": "amd_savesearch", - "description": "", - "hits": 0, - "columns": [ - "author", - "location", - "message", - "polarity", - "subjectivity", - "sentiment" - ], - "sort": [ - "date", - "desc" - ], - "version": 1, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"amd\",\"highlightAll\":true,\"version\":true,\"query\":{\"match_all\":{}},\"filter\":[{\"meta\":{\"index\":\"amd\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"_type\",\"value\":\"tweet\"},\"query\":{\"match\":{\"_type\":{\"query\":\"tweet\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" - } - } - }, - { - "_id": "amd-stock-visualization-1", - "_type": "visualization", - "_source": { - "title": "amd_polarity", - "visState": "{\"title\":\"amd_polarity\",\"type\":\"metric\",\"params\":{\"addTooltip\":true,\"addLegend\":false,\"type\":\"gauge\",\"gauge\":{\"verticalSplit\":false,\"autoExtend\":false,\"percentageMode\":false,\"gaugeType\":\"Metric\",\"gaugeStyle\":\"Full\",\"backStyle\":\"Full\",\"orientation\":\"vertical\",\"colorSchema\":\"Green to Red\",\"gaugeColorMode\":\"None\",\"useRange\":false,\"colorsRange\":[{\"from\":0,\"to\":1}],\"invertColors\":false,\"labels\":{\"show\":true,\"color\":\"black\"},\"scale\":{\"show\":false,\"labels\":false,\"color\":\"#333\",\"width\":2},\"type\":\"simple\",\"style\":{\"fontSize\":\"24\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\"},\"extendRange\":false}},\"aggs\":[{\"id\":\"5\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"median\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\",\"percents\":[50]}},{\"id\":\"2\",\"enabled\":true,\"type\":\"min\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}}],\"listeners\":{}}", - "uiStateJSON": "{\"vis\":{\"defaultColors\":{\"0 - 1\":\"rgb(0,104,55)\"}}}", - "description": "", - "version": 1, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"amd\",\"query\":{\"match_all\":{}},\"filter\":[]}" - } - } - }, - { - "_id": "amd-stock-visualization-2", - "_type": "visualization", - "_source": { - "title": "amd_sentinel", - "visState": "{\"title\":\"amd_sentinel\",\"type\":\"pie\",\"params\":{\"addLegend\":true,\"addTooltip\":true,\"isDonut\":false,\"legendPosition\":\"bottom\",\"type\":\"pie\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment.keyword\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{}}", - "uiStateJSON": "{}", - "description": "", - "version": 1, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"amd\",\"query\":{\"match_all\":{}},\"filter\":[]}" - } - } - }, - { - "_id": "amd-stock-visualization-3", - "_type": "visualization", - "_source": { - "title": "amd_stockprice", - "visState": "{\"title\":\"amd_stockprice\",\"type\":\"line\",\"params\":{\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{\"text\":\"date per 30 seconds\"}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Avg of price_last\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Avg of price_last\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"3\",\"label\":\"Avg of price_high\"},\"valueAxis\":\"ValueAxis-1\"},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"4\",\"label\":\"Avg of price_low\"},\"valueAxis\":\"ValueAxis-1\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"bottom\",\"times\":[],\"addTimeMarker\":false,\"type\":\"line\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_last\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_high\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_low\"}}],\"listeners\":{}}", - "uiStateJSON": "{}", - "description": "", - "version": 1, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"amd\",\"query\":{\"match_all\":{}},\"filter\":[{\"meta\":{\"index\":\"amd\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"_type\",\"value\":\"stock\"},\"query\":{\"match\":{\"_type\":{\"query\":\"stock\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" - } - } - }, - { - "_id": "amd-stock-visualization-4", - "_type": "visualization", - "_source": { - "title": "amd_tweets", - "visState": "{\"title\":\"amd_tweets\",\"type\":\"line\",\"params\":{\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{\"text\":\"date per 30 seconds\"}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"interpolate\":\"linear\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"bottom\",\"times\":[],\"addTimeMarker\":false,\"type\":\"line\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{}}", - "uiStateJSON": "{}", - "description": "", - "version": 1, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"amd\",\"query\":{\"match_all\":{}},\"filter\":[]}" - } - } - }, - { - "_id": "amd-stock-visualization-5", - "_type": "visualization", - "_source": { - "title": "amd_wordcloud", - "visState": "{\n \"title\": \"amd_wordcloud\",\n \"type\": \"tagcloud\",\n \"params\": {\n \"scale\": \"linear\",\n \"orientation\": \"single\",\n \"minFontSize\": 14,\n \"maxFontSize\": 36,\n \"type\": \"tagcloud\"\n },\n \"aggs\": [\n {\n \"id\": \"1\",\n \"enabled\": true,\n \"type\": \"count\",\n \"schema\": \"metric\",\n \"params\": {}\n },\n {\n \"id\": \"2\",\n \"enabled\": true,\n \"type\": \"terms\",\n \"schema\": \"segment\",\n \"params\": {\n \"field\": \"message.keyword\",\n \"size\": 25,\n \"order\": \"desc\",\n \"orderBy\": \"1\"\n }\n }\n ],\n \"listeners\": {}\n}", - "uiStateJSON": "{}", - "description": "", - "version": 1, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\n \"index\": \"amd\",\n \"query\": {\n \"match_all\": {}\n },\n \"filter\": []\n}" - } - } - } -] \ No newline at end of file diff --git a/kibana_exports/export.json b/kibana_exports/export.json new file mode 100644 index 0000000..7529d57 --- /dev/null +++ b/kibana_exports/export.json @@ -0,0 +1,114 @@ +[ + { + "_id": "template-stock-dashboard", + "_type": "dashboard", + "_source": { + "title": "template_dashboard", + "hits": 0, + "description": "", + "panelsJSON": "[{\"col\":1,\"id\":\"template-stock-visualization-2\",\"panelIndex\":1,\"row\":3,\"size_x\":3,\"size_y\":3,\"type\":\"visualization\"},{\"col\":9,\"id\":\"template-stock-visualization-4\",\"panelIndex\":2,\"row\":3,\"size_x\":4,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"columns\":[\"author\",\"location\",\"message\",\"polarity\",\"subjectivity\",\"sentiment\"],\"id\":\"template-stock-search\",\"panelIndex\":3,\"row\":6,\"size_x\":12,\"size_y\":4,\"sort\":[\"date\",\"desc\"],\"type\":\"search\"},{\"col\":1,\"id\":\"template-stock-visualization-5\",\"panelIndex\":4,\"row\":10,\"size_x\":12,\"size_y\":4,\"type\":\"visualization\"},{\"col\":4,\"id\":\"template-stock-visualization-3\",\"panelIndex\":5,\"row\":3,\"size_x\":5,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"id\":\"template-stock-visualization-1\",\"panelIndex\":6,\"row\":1,\"size_x\":12,\"size_y\":2,\"type\":\"visualization\"}]", + "optionsJSON": "{\"darkTheme\":true}", + "uiStateJSON": "{\"P-2\":{\"vis\":{\"legendOpen\":true}},\"P-6\":{\"vis\":{\"defaultColors\":{\"0 - 1\":\"rgb(0,104,55)\"}}}}", + "version": 1, + "timeRestore": false, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"filter\":[{\"query\":{\"match_all\":{}}}],\"highlightAll\":true,\"version\":true}" + } + } + }, + { + "_id": "template-stock-search", + "_type": "search", + "_source": { + "title": "template_savesearch", + "description": "", + "hits": 0, + "columns": [ + "author", + "location", + "message", + "polarity", + "subjectivity", + "sentiment" + ], + "sort": [ + "date", + "desc" + ], + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"template\",\"highlightAll\":true,\"version\":true,\"query\":{\"match_all\":{}},\"filter\":[{\"meta\":{\"index\":\"template\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"_type\",\"value\":\"tweet\"},\"query\":{\"match\":{\"_type\":{\"query\":\"tweet\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" + } + } + }, + { + "_id": "template-stock-visualization-1", + "_type": "visualization", + "_source": { + "title": "template_polarity", + "visState": "{\"title\":\"template_polarity\",\"type\":\"metric\",\"params\":{\"addTooltip\":true,\"addLegend\":false,\"type\":\"gauge\",\"gauge\":{\"verticalSplit\":false,\"autoExtend\":false,\"percentageMode\":false,\"gaugeType\":\"Metric\",\"gaugeStyle\":\"Full\",\"backStyle\":\"Full\",\"orientation\":\"vertical\",\"colorSchema\":\"Green to Red\",\"gaugeColorMode\":\"None\",\"useRange\":false,\"colorsRange\":[{\"from\":0,\"to\":1}],\"invertColors\":false,\"labels\":{\"show\":true,\"color\":\"black\"},\"scale\":{\"show\":false,\"labels\":false,\"color\":\"#333\",\"width\":2},\"type\":\"simple\",\"style\":{\"fontSize\":\"24\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\"},\"extendRange\":false}},\"aggs\":[{\"id\":\"5\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"median\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\",\"percents\":[50]}},{\"id\":\"2\",\"enabled\":true,\"type\":\"min\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}}],\"listeners\":{}}", + "uiStateJSON": "{\"vis\":{\"defaultColors\":{\"0 - 1\":\"rgb(0,104,55)\"}}}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"template\",\"query\":{\"match_all\":{}},\"filter\":[]}" + } + } + }, + { + "_id": "template-stock-visualization-2", + "_type": "visualization", + "_source": { + "title": "template_sentinel", + "visState": "{\"title\":\"template_sentinel\",\"type\":\"pie\",\"params\":{\"addLegend\":true,\"addTooltip\":true,\"isDonut\":false,\"legendPosition\":\"bottom\",\"type\":\"pie\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment.keyword\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{}}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"template\",\"query\":{\"match_all\":{}},\"filter\":[]}" + } + } + }, + { + "_id": "template-stock-visualization-3", + "_type": "visualization", + "_source": { + "title": "template_stockprice", + "visState": "{\"title\":\"template_stockprice\",\"type\":\"line\",\"params\":{\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{\"text\":\"date per 30 seconds\"}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Avg of price_last\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Avg of price_last\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"3\",\"label\":\"Avg of price_high\"},\"valueAxis\":\"ValueAxis-1\"},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"4\",\"label\":\"Avg of price_low\"},\"valueAxis\":\"ValueAxis-1\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"bottom\",\"times\":[],\"addTimeMarker\":false,\"type\":\"line\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_last\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_high\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_low\"}}],\"listeners\":{}}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"template\",\"query\":{\"match_all\":{}},\"filter\":[{\"meta\":{\"index\":\"template\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"_type\",\"value\":\"stock\"},\"query\":{\"match\":{\"_type\":{\"query\":\"stock\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" + } + } + }, + { + "_id": "template-stock-visualization-4", + "_type": "visualization", + "_source": { + "title": "template_tweets", + "visState": "{\"title\":\"template_tweets\",\"type\":\"line\",\"params\":{\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{\"text\":\"date per 30 seconds\"}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"interpolate\":\"linear\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"bottom\",\"times\":[],\"addTimeMarker\":false,\"type\":\"line\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{}}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"template\",\"query\":{\"match_all\":{}},\"filter\":[]}" + } + } + }, + { + "_id": "template-stock-visualization-5", + "_type": "visualization", + "_source": { + "title": "template_wordcloud", + "visState": "{\n \"title\": \"template_wordcloud\",\n \"type\": \"tagcloud\",\n \"params\": {\n \"scale\": \"linear\",\n \"orientation\": \"single\",\n \"minFontSize\": 14,\n \"maxFontSize\": 36,\n \"type\": \"tagcloud\"\n },\n \"aggs\": [\n {\n \"id\": \"1\",\n \"enabled\": true,\n \"type\": \"count\",\n \"schema\": \"metric\",\n \"params\": {}\n },\n {\n \"id\": \"2\",\n \"enabled\": true,\n \"type\": \"terms\",\n \"schema\": \"segment\",\n \"params\": {\n \"field\": \"message.keyword\",\n \"size\": 25,\n \"order\": \"desc\",\n \"orderBy\": \"1\"\n }\n }\n ],\n \"listeners\": {}\n}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\n \"index\": \"template\",\n \"query\": {\n \"match_all\": {}\n },\n \"filter\": []\n}" + } + } + } +] \ No newline at end of file diff --git a/kibana_exports/tsla_export.json b/kibana_exports/tsla_export.json deleted file mode 100644 index 206a0aa..0000000 --- a/kibana_exports/tsla_export.json +++ /dev/null @@ -1,114 +0,0 @@ -[ - { - "_id": "tsla-stock-dashboard", - "_type": "dashboard", - "_source": { - "title": "tsla_dashboard", - "hits": 0, - "description": "", - "panelsJSON": "[{\"col\":1,\"id\":\"tsla-stock-visualization-2\",\"panelIndex\":1,\"row\":3,\"size_x\":3,\"size_y\":3,\"type\":\"visualization\"},{\"col\":9,\"id\":\"tsla-stock-visualization-4\",\"panelIndex\":2,\"row\":3,\"size_x\":4,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"columns\":[\"author\",\"location\",\"message\",\"polarity\",\"subjectivity\",\"sentiment\"],\"id\":\"tsla-stock-search\",\"panelIndex\":3,\"row\":6,\"size_x\":12,\"size_y\":4,\"sort\":[\"date\",\"desc\"],\"type\":\"search\"},{\"col\":1,\"id\":\"tsla-stock-visualization-5\",\"panelIndex\":4,\"row\":10,\"size_x\":12,\"size_y\":4,\"type\":\"visualization\"},{\"col\":4,\"id\":\"tsla-stock-visualization-3\",\"panelIndex\":5,\"row\":3,\"size_x\":5,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"id\":\"tsla-stock-visualization-1\",\"panelIndex\":6,\"row\":1,\"size_x\":12,\"size_y\":2,\"type\":\"visualization\"}]", - "optionsJSON": "{\"darkTheme\":true}", - "uiStateJSON": "{\"P-2\":{\"vis\":{\"legendOpen\":true}},\"P-6\":{\"vis\":{\"defaultColors\":{\"0 - 1\":\"rgb(0,104,55)\"}}}}", - "version": 1, - "timeRestore": false, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"filter\":[{\"query\":{\"match_all\":{}}}],\"highlightAll\":true,\"version\":true}" - } - } - }, - { - "_id": "tsla-stock-search", - "_type": "search", - "_source": { - "title": "tsla_savesearch", - "description": "", - "hits": 0, - "columns": [ - "author", - "location", - "message", - "polarity", - "subjectivity", - "sentiment" - ], - "sort": [ - "date", - "desc" - ], - "version": 1, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"tsla\",\"highlightAll\":true,\"version\":true,\"query\":{\"match_all\":{}},\"filter\":[{\"meta\":{\"index\":\"tsla\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"_type\",\"value\":\"tweet\"},\"query\":{\"match\":{\"_type\":{\"query\":\"tweet\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" - } - } - }, - { - "_id": "tsla-stock-visualization-1", - "_type": "visualization", - "_source": { - "title": "tsla_polarity", - "visState": "{\"title\":\"tsla_polarity\",\"type\":\"metric\",\"params\":{\"addTooltip\":true,\"addLegend\":false,\"type\":\"gauge\",\"gauge\":{\"verticalSplit\":false,\"autoExtend\":false,\"percentageMode\":false,\"gaugeType\":\"Metric\",\"gaugeStyle\":\"Full\",\"backStyle\":\"Full\",\"orientation\":\"vertical\",\"colorSchema\":\"Green to Red\",\"gaugeColorMode\":\"None\",\"useRange\":false,\"colorsRange\":[{\"from\":0,\"to\":1}],\"invertColors\":false,\"labels\":{\"show\":true,\"color\":\"black\"},\"scale\":{\"show\":false,\"labels\":false,\"color\":\"#333\",\"width\":2},\"type\":\"simple\",\"style\":{\"fontSize\":\"24\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\"},\"extendRange\":false}},\"aggs\":[{\"id\":\"5\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"median\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\",\"percents\":[50]}},{\"id\":\"2\",\"enabled\":true,\"type\":\"min\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}}],\"listeners\":{}}", - "uiStateJSON": "{\"vis\":{\"defaultColors\":{\"0 - 1\":\"rgb(0,104,55)\"}}}", - "description": "", - "version": 1, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"tsla\",\"query\":{\"match_all\":{}},\"filter\":[]}" - } - } - }, - { - "_id": "tsla-stock-visualization-2", - "_type": "visualization", - "_source": { - "title": "tsla_sentinel", - "visState": "{\"title\":\"tsla_sentinel\",\"type\":\"pie\",\"params\":{\"addLegend\":true,\"addTooltip\":true,\"isDonut\":false,\"legendPosition\":\"bottom\",\"type\":\"pie\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment.keyword\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{}}", - "uiStateJSON": "{}", - "description": "", - "version": 1, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"tsla\",\"query\":{\"match_all\":{}},\"filter\":[]}" - } - } - }, - { - "_id": "tsla-stock-visualization-3", - "_type": "visualization", - "_source": { - "title": "tsla_stockprice", - "visState": "{\"title\":\"tsla_stockprice\",\"type\":\"line\",\"params\":{\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{\"text\":\"date per 30 seconds\"}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Avg of price_last\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Avg of price_last\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"3\",\"label\":\"Avg of price_high\"},\"valueAxis\":\"ValueAxis-1\"},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"4\",\"label\":\"Avg of price_low\"},\"valueAxis\":\"ValueAxis-1\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"bottom\",\"times\":[],\"addTimeMarker\":false,\"type\":\"line\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_last\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_high\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_low\"}}],\"listeners\":{}}", - "uiStateJSON": "{}", - "description": "", - "version": 1, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"tsla\",\"query\":{\"match_all\":{}},\"filter\":[{\"meta\":{\"index\":\"tsla\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"_type\",\"value\":\"stock\"},\"query\":{\"match\":{\"_type\":{\"query\":\"stock\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" - } - } - }, - { - "_id": "tsla-stock-visualization-4", - "_type": "visualization", - "_source": { - "title": "tsla_tweets", - "visState": "{\"title\":\"tsla_tweets\",\"type\":\"line\",\"params\":{\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{\"text\":\"date per 30 seconds\"}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"interpolate\":\"linear\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"bottom\",\"times\":[],\"addTimeMarker\":false,\"type\":\"line\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{}}", - "uiStateJSON": "{}", - "description": "", - "version": 1, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"tsla\",\"query\":{\"match_all\":{}},\"filter\":[]}" - } - } - }, - { - "_id": "tsla-stock-visualization-5", - "_type": "visualization", - "_source": { - "title": "tsla_wordcloud", - "visState": "{\n \"title\": \"tsla_wordcloud\",\n \"type\": \"tagcloud\",\n \"params\": {\n \"scale\": \"linear\",\n \"orientation\": \"single\",\n \"minFontSize\": 14,\n \"maxFontSize\": 36,\n \"type\": \"tagcloud\"\n },\n \"aggs\": [\n {\n \"id\": \"1\",\n \"enabled\": true,\n \"type\": \"count\",\n \"schema\": \"metric\",\n \"params\": {}\n },\n {\n \"id\": \"2\",\n \"enabled\": true,\n \"type\": \"terms\",\n \"schema\": \"segment\",\n \"params\": {\n \"field\": \"message.keyword\",\n \"size\": 25,\n \"order\": \"desc\",\n \"orderBy\": \"1\"\n }\n }\n ],\n \"listeners\": {}\n}", - "uiStateJSON": "{}", - "description": "", - "version": 1, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\n \"index\": \"tsla\",\n \"query\": {\n \"match_all\": {}\n },\n \"filter\": []\n}" - } - } - } -] \ No newline at end of file From 3cd6de0669da969e9ff95125ea5b86ccf8d988e7 Mon Sep 17 00:00:00 2001 From: heyqule Date: Tue, 27 Aug 2019 20:42:25 -0400 Subject: [PATCH 13/55] Change Kibana template --- kibana_exports/export.json | 52 +++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/kibana_exports/export.json b/kibana_exports/export.json index 7529d57..bdfdfa2 100644 --- a/kibana_exports/export.json +++ b/kibana_exports/export.json @@ -1,12 +1,12 @@ [ { - "_id": "template-stock-dashboard", + "_id": "tmpl-stock-dashboard", "_type": "dashboard", "_source": { - "title": "template_dashboard", + "title": "tmpl_dashboard", "hits": 0, "description": "", - "panelsJSON": "[{\"col\":1,\"id\":\"template-stock-visualization-2\",\"panelIndex\":1,\"row\":3,\"size_x\":3,\"size_y\":3,\"type\":\"visualization\"},{\"col\":9,\"id\":\"template-stock-visualization-4\",\"panelIndex\":2,\"row\":3,\"size_x\":4,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"columns\":[\"author\",\"location\",\"message\",\"polarity\",\"subjectivity\",\"sentiment\"],\"id\":\"template-stock-search\",\"panelIndex\":3,\"row\":6,\"size_x\":12,\"size_y\":4,\"sort\":[\"date\",\"desc\"],\"type\":\"search\"},{\"col\":1,\"id\":\"template-stock-visualization-5\",\"panelIndex\":4,\"row\":10,\"size_x\":12,\"size_y\":4,\"type\":\"visualization\"},{\"col\":4,\"id\":\"template-stock-visualization-3\",\"panelIndex\":5,\"row\":3,\"size_x\":5,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"id\":\"template-stock-visualization-1\",\"panelIndex\":6,\"row\":1,\"size_x\":12,\"size_y\":2,\"type\":\"visualization\"}]", + "panelsJSON": "[{\"col\":1,\"id\":\"tmpl-stock-visualization-2\",\"panelIndex\":1,\"row\":3,\"size_x\":3,\"size_y\":3,\"type\":\"visualization\"},{\"col\":9,\"id\":\"tmpl-stock-visualization-4\",\"panelIndex\":2,\"row\":3,\"size_x\":4,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"columns\":[\"author\",\"location\",\"message\",\"polarity\",\"subjectivity\",\"sentiment\"],\"id\":\"tmpl-stock-search\",\"panelIndex\":3,\"row\":6,\"size_x\":12,\"size_y\":4,\"sort\":[\"date\",\"desc\"],\"type\":\"search\"},{\"col\":1,\"id\":\"tmpl-stock-visualization-5\",\"panelIndex\":4,\"row\":10,\"size_x\":12,\"size_y\":4,\"type\":\"visualization\"},{\"col\":4,\"id\":\"tmpl-stock-visualization-3\",\"panelIndex\":5,\"row\":3,\"size_x\":5,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"id\":\"tmpl-stock-visualization-1\",\"panelIndex\":6,\"row\":1,\"size_x\":12,\"size_y\":2,\"type\":\"visualization\"}]", "optionsJSON": "{\"darkTheme\":true}", "uiStateJSON": "{\"P-2\":{\"vis\":{\"legendOpen\":true}},\"P-6\":{\"vis\":{\"defaultColors\":{\"0 - 1\":\"rgb(0,104,55)\"}}}}", "version": 1, @@ -17,10 +17,10 @@ } }, { - "_id": "template-stock-search", + "_id": "tmpl-stock-search", "_type": "search", "_source": { - "title": "template_savesearch", + "title": "tmpl_savesearch", "description": "", "hits": 0, "columns": [ @@ -37,77 +37,77 @@ ], "version": 1, "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"template\",\"highlightAll\":true,\"version\":true,\"query\":{\"match_all\":{}},\"filter\":[{\"meta\":{\"index\":\"template\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"_type\",\"value\":\"tweet\"},\"query\":{\"match\":{\"_type\":{\"query\":\"tweet\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" + "searchSourceJSON": "{\"index\":\"tmpl\",\"highlightAll\":true,\"version\":true,\"query\":{\"match_all\":{}},\"filter\":[{\"meta\":{\"index\":\"tmpl\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"_type\",\"value\":\"tweet\"},\"query\":{\"match\":{\"_type\":{\"query\":\"tweet\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" } } }, { - "_id": "template-stock-visualization-1", + "_id": "tmpl-stock-visualization-1", "_type": "visualization", "_source": { - "title": "template_polarity", - "visState": "{\"title\":\"template_polarity\",\"type\":\"metric\",\"params\":{\"addTooltip\":true,\"addLegend\":false,\"type\":\"gauge\",\"gauge\":{\"verticalSplit\":false,\"autoExtend\":false,\"percentageMode\":false,\"gaugeType\":\"Metric\",\"gaugeStyle\":\"Full\",\"backStyle\":\"Full\",\"orientation\":\"vertical\",\"colorSchema\":\"Green to Red\",\"gaugeColorMode\":\"None\",\"useRange\":false,\"colorsRange\":[{\"from\":0,\"to\":1}],\"invertColors\":false,\"labels\":{\"show\":true,\"color\":\"black\"},\"scale\":{\"show\":false,\"labels\":false,\"color\":\"#333\",\"width\":2},\"type\":\"simple\",\"style\":{\"fontSize\":\"24\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\"},\"extendRange\":false}},\"aggs\":[{\"id\":\"5\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"median\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\",\"percents\":[50]}},{\"id\":\"2\",\"enabled\":true,\"type\":\"min\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}}],\"listeners\":{}}", + "title": "tmpl_polarity", + "visState": "{\"title\":\"tmpl_polarity\",\"type\":\"metric\",\"params\":{\"addTooltip\":true,\"addLegend\":false,\"type\":\"gauge\",\"gauge\":{\"verticalSplit\":false,\"autoExtend\":false,\"percentageMode\":false,\"gaugeType\":\"Metric\",\"gaugeStyle\":\"Full\",\"backStyle\":\"Full\",\"orientation\":\"vertical\",\"colorSchema\":\"Green to Red\",\"gaugeColorMode\":\"None\",\"useRange\":false,\"colorsRange\":[{\"from\":0,\"to\":1}],\"invertColors\":false,\"labels\":{\"show\":true,\"color\":\"black\"},\"scale\":{\"show\":false,\"labels\":false,\"color\":\"#333\",\"width\":2},\"type\":\"simple\",\"style\":{\"fontSize\":\"24\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\"},\"extendRange\":false}},\"aggs\":[{\"id\":\"5\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"median\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\",\"percents\":[50]}},{\"id\":\"2\",\"enabled\":true,\"type\":\"min\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}}],\"listeners\":{}}", "uiStateJSON": "{\"vis\":{\"defaultColors\":{\"0 - 1\":\"rgb(0,104,55)\"}}}", "description": "", "version": 1, "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"template\",\"query\":{\"match_all\":{}},\"filter\":[]}" + "searchSourceJSON": "{\"index\":\"tmpl\",\"query\":{\"match_all\":{}},\"filter\":[]}" } } }, { - "_id": "template-stock-visualization-2", + "_id": "tmpl-stock-visualization-2", "_type": "visualization", "_source": { - "title": "template_sentinel", - "visState": "{\"title\":\"template_sentinel\",\"type\":\"pie\",\"params\":{\"addLegend\":true,\"addTooltip\":true,\"isDonut\":false,\"legendPosition\":\"bottom\",\"type\":\"pie\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment.keyword\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{}}", + "title": "tmpl_sentinel", + "visState": "{\"title\":\"tmpl_sentinel\",\"type\":\"pie\",\"params\":{\"addLegend\":true,\"addTooltip\":true,\"isDonut\":false,\"legendPosition\":\"bottom\",\"type\":\"pie\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment.keyword\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{}}", "uiStateJSON": "{}", "description": "", "version": 1, "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"template\",\"query\":{\"match_all\":{}},\"filter\":[]}" + "searchSourceJSON": "{\"index\":\"tmpl\",\"query\":{\"match_all\":{}},\"filter\":[]}" } } }, { - "_id": "template-stock-visualization-3", + "_id": "tmpl-stock-visualization-3", "_type": "visualization", "_source": { - "title": "template_stockprice", - "visState": "{\"title\":\"template_stockprice\",\"type\":\"line\",\"params\":{\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{\"text\":\"date per 30 seconds\"}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Avg of price_last\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Avg of price_last\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"3\",\"label\":\"Avg of price_high\"},\"valueAxis\":\"ValueAxis-1\"},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"4\",\"label\":\"Avg of price_low\"},\"valueAxis\":\"ValueAxis-1\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"bottom\",\"times\":[],\"addTimeMarker\":false,\"type\":\"line\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_last\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_high\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_low\"}}],\"listeners\":{}}", + "title": "tmpl_stockprice", + "visState": "{\"title\":\"tmpl_stockprice\",\"type\":\"line\",\"params\":{\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{\"text\":\"date per 30 seconds\"}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Avg of price_last\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Avg of price_last\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"3\",\"label\":\"Avg of price_high\"},\"valueAxis\":\"ValueAxis-1\"},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"4\",\"label\":\"Avg of price_low\"},\"valueAxis\":\"ValueAxis-1\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"bottom\",\"times\":[],\"addTimeMarker\":false,\"type\":\"line\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_last\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_high\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_low\"}}],\"listeners\":{}}", "uiStateJSON": "{}", "description": "", "version": 1, "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"template\",\"query\":{\"match_all\":{}},\"filter\":[{\"meta\":{\"index\":\"template\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"_type\",\"value\":\"stock\"},\"query\":{\"match\":{\"_type\":{\"query\":\"stock\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" + "searchSourceJSON": "{\"index\":\"tmpl\",\"query\":{\"match_all\":{}},\"filter\":[{\"meta\":{\"index\":\"tmpl\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"_type\",\"value\":\"stock\"},\"query\":{\"match\":{\"_type\":{\"query\":\"stock\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" } } }, { - "_id": "template-stock-visualization-4", + "_id": "tmpl-stock-visualization-4", "_type": "visualization", "_source": { - "title": "template_tweets", - "visState": "{\"title\":\"template_tweets\",\"type\":\"line\",\"params\":{\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{\"text\":\"date per 30 seconds\"}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"interpolate\":\"linear\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"bottom\",\"times\":[],\"addTimeMarker\":false,\"type\":\"line\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{}}", + "title": "tmpl_tweets", + "visState": "{\"title\":\"tmpl_tweets\",\"type\":\"line\",\"params\":{\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{\"text\":\"date per 30 seconds\"}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"interpolate\":\"linear\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"bottom\",\"times\":[],\"addTimeMarker\":false,\"type\":\"line\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{}}", "uiStateJSON": "{}", "description": "", "version": 1, "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"template\",\"query\":{\"match_all\":{}},\"filter\":[]}" + "searchSourceJSON": "{\"index\":\"tmpl\",\"query\":{\"match_all\":{}},\"filter\":[]}" } } }, { - "_id": "template-stock-visualization-5", + "_id": "tmpl-stock-visualization-5", "_type": "visualization", "_source": { - "title": "template_wordcloud", - "visState": "{\n \"title\": \"template_wordcloud\",\n \"type\": \"tagcloud\",\n \"params\": {\n \"scale\": \"linear\",\n \"orientation\": \"single\",\n \"minFontSize\": 14,\n \"maxFontSize\": 36,\n \"type\": \"tagcloud\"\n },\n \"aggs\": [\n {\n \"id\": \"1\",\n \"enabled\": true,\n \"type\": \"count\",\n \"schema\": \"metric\",\n \"params\": {}\n },\n {\n \"id\": \"2\",\n \"enabled\": true,\n \"type\": \"terms\",\n \"schema\": \"segment\",\n \"params\": {\n \"field\": \"message.keyword\",\n \"size\": 25,\n \"order\": \"desc\",\n \"orderBy\": \"1\"\n }\n }\n ],\n \"listeners\": {}\n}", + "title": "tmpl_wordcloud", + "visState": "{\n \"title\": \"tmpl_wordcloud\",\n \"type\": \"tagcloud\",\n \"params\": {\n \"scale\": \"linear\",\n \"orientation\": \"single\",\n \"minFontSize\": 14,\n \"maxFontSize\": 36,\n \"type\": \"tagcloud\"\n },\n \"aggs\": [\n {\n \"id\": \"1\",\n \"enabled\": true,\n \"type\": \"count\",\n \"schema\": \"metric\",\n \"params\": {}\n },\n {\n \"id\": \"2\",\n \"enabled\": true,\n \"type\": \"terms\",\n \"schema\": \"segment\",\n \"params\": {\n \"field\": \"message.keyword\",\n \"size\": 25,\n \"order\": \"desc\",\n \"orderBy\": \"1\"\n }\n }\n ],\n \"listeners\": {}\n}", "uiStateJSON": "{}", "description": "", "version": 1, "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\n \"index\": \"template\",\n \"query\": {\n \"match_all\": {}\n },\n \"filter\": []\n}" + "searchSourceJSON": "{\n \"index\": \"tmpl\",\n \"query\": {\n \"match_all\": {}\n },\n \"filter\": []\n}" } } } From fde91817da0388fc44a91e5380e8e60221ce8f24 Mon Sep 17 00:00:00 2001 From: heyqule Date: Thu, 29 Aug 2019 02:05:32 -0400 Subject: [PATCH 14/55] Move news out of original sentiment script --- docker-compose.yml | 7 +- Python.Dockerfile => python-docker/Dockerfile | 0 .../requirements.txt | 3 +- redis-docker/Dockerfile | 3 + redis-docker/redis.conf | 1375 +++++++++++++++++ src/Helper/Sentiment.py | 130 ++ src/Initializer/ElasticSearchInit.py | 10 + src/Initializer/LoggerInit.py | 25 + src/Initializer/RedisInit.py | 4 + src/Initializer/str_unicode.py | 6 + src/NewsHeadlineListener.py | 129 ++ src/delindex.py | 18 + src/news.sentiment.py | 138 ++ src/startup.sh | 11 +- 14 files changed, 1848 insertions(+), 11 deletions(-) rename Python.Dockerfile => python-docker/Dockerfile (100%) rename requirements.txt => python-docker/requirements.txt (89%) create mode 100644 redis-docker/Dockerfile create mode 100644 redis-docker/redis.conf create mode 100644 src/Helper/Sentiment.py create mode 100644 src/Initializer/ElasticSearchInit.py create mode 100644 src/Initializer/LoggerInit.py create mode 100644 src/Initializer/RedisInit.py create mode 100644 src/Initializer/str_unicode.py create mode 100644 src/NewsHeadlineListener.py create mode 100644 src/delindex.py create mode 100644 src/news.sentiment.py diff --git a/docker-compose.yml b/docker-compose.yml index 35340bf..9e5eba0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -24,6 +24,9 @@ services: #expose this for local dev only! #ports: # - "9200:9200" + redis: + build: + context: ./redis-docker kibana: image: docker.elastic.co/kibana/kibana:5.6.16 depends_on: @@ -32,9 +35,9 @@ services: - "5601:5601" python3: build: - context: ./ - dockerfile: Python.Dockerfile + context: ./python-docker depends_on: - elasticsearch + - redis volumes: - ./src:/usr/src/app diff --git a/Python.Dockerfile b/python-docker/Dockerfile similarity index 100% rename from Python.Dockerfile rename to python-docker/Dockerfile diff --git a/requirements.txt b/python-docker/requirements.txt similarity index 89% rename from requirements.txt rename to python-docker/requirements.txt index a625eed..580cfe6 100644 --- a/requirements.txt +++ b/python-docker/requirements.txt @@ -5,4 +5,5 @@ tweepy beautifulsoup4 textblob vaderSentiment -pytz \ No newline at end of file +pytz +redis \ No newline at end of file diff --git a/redis-docker/Dockerfile b/redis-docker/Dockerfile new file mode 100644 index 0000000..4c7d407 --- /dev/null +++ b/redis-docker/Dockerfile @@ -0,0 +1,3 @@ +FROM redis:5-alpine +COPY redis.conf /usr/local/etc/redis/redis.conf +CMD [ "redis-server", "/usr/local/etc/redis/redis.conf" ] \ No newline at end of file diff --git a/redis-docker/redis.conf b/redis-docker/redis.conf new file mode 100644 index 0000000..5eab77e --- /dev/null +++ b/redis-docker/redis.conf @@ -0,0 +1,1375 @@ +# Redis configuration file example. +# +# Note that in order to read the configuration file, Redis must be +# started with the file path as first argument: +# +# ./redis-server /path/to/redis.conf + +# Note on units: when memory size is needed, it is possible to specify +# it in the usual form of 1k 5GB 4M and so forth: +# +# 1k => 1000 bytes +# 1kb => 1024 bytes +# 1m => 1000000 bytes +# 1mb => 1024*1024 bytes +# 1g => 1000000000 bytes +# 1gb => 1024*1024*1024 bytes +# +# units are case insensitive so 1GB 1Gb 1gB are all the same. + +################################## INCLUDES ################################### + +# Include one or more other config files here. This is useful if you +# have a standard template that goes to all Redis servers but also need +# to customize a few per-server settings. Include files can include +# other files, so use this wisely. +# +# Notice option "include" won't be rewritten by command "CONFIG REWRITE" +# from admin or Redis Sentinel. Since Redis always uses the last processed +# line as value of a configuration directive, you'd better put includes +# at the beginning of this file to avoid overwriting config change at runtime. +# +# If instead you are interested in using includes to override configuration +# options, it is better to use include as the last line. +# +# include /path/to/local.conf +# include /path/to/other.conf + +################################## MODULES ##################################### + +# Load modules at startup. If the server is not able to load modules +# it will abort. It is possible to use multiple loadmodule directives. +# +# loadmodule /path/to/my_module.so +# loadmodule /path/to/other_module.so + +################################## NETWORK ##################################### + +# By default, if no "bind" configuration directive is specified, Redis listens +# for connections from all the network interfaces available on the server. +# It is possible to listen to just one or multiple selected interfaces using +# the "bind" configuration directive, followed by one or more IP addresses. +# +# Examples: +# +# bind 192.168.1.100 10.0.0.1 +# bind 127.0.0.1 ::1 +# +# ~~~ WARNING ~~~ If the computer running Redis is directly exposed to the +# internet, binding to all the interfaces is dangerous and will expose the +# instance to everybody on the internet. So by default we uncomment the +# following bind directive, that will force Redis to listen only into +# the IPv4 loopback interface address (this means Redis will be able to +# accept connections only from clients running into the same computer it +# is running). +# +# IF YOU ARE SURE YOU WANT YOUR INSTANCE TO LISTEN TO ALL THE INTERFACES +# JUST COMMENT THE FOLLOWING LINE. +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +bind 0.0.0.0 + +# Protected mode is a layer of security protection, in order to avoid that +# Redis instances left open on the internet are accessed and exploited. +# +# When protected mode is on and if: +# +# 1) The server is not binding explicitly to a set of addresses using the +# "bind" directive. +# 2) No password is configured. +# +# The server only accepts connections from clients connecting from the +# IPv4 and IPv6 loopback addresses 127.0.0.1 and ::1, and from Unix domain +# sockets. +# +# By default protected mode is enabled. You should disable it only if +# you are sure you want clients from other hosts to connect to Redis +# even if no authentication is configured, nor a specific set of interfaces +# are explicitly listed using the "bind" directive. +protected-mode yes + +# Accept connections on the specified port, default is 6379 (IANA #815344). +# If port 0 is specified Redis will not listen on a TCP socket. +port 6379 + +# TCP listen() backlog. +# +# In high requests-per-second environments you need an high backlog in order +# to avoid slow clients connections issues. Note that the Linux kernel +# will silently truncate it to the value of /proc/sys/net/core/somaxconn so +# make sure to raise both the value of somaxconn and tcp_max_syn_backlog +# in order to get the desired effect. +tcp-backlog 511 + +# Unix socket. +# +# Specify the path for the Unix socket that will be used to listen for +# incoming connections. There is no default, so Redis will not listen +# on a unix socket when not specified. +# +# unixsocket /tmp/redis.sock +# unixsocketperm 700 + +# Close the connection after a client is idle for N seconds (0 to disable) +timeout 300 + +# TCP keepalive. +# +# If non-zero, use SO_KEEPALIVE to send TCP ACKs to clients in absence +# of communication. This is useful for two reasons: +# +# 1) Detect dead peers. +# 2) Take the connection alive from the point of view of network +# equipment in the middle. +# +# On Linux, the specified value (in seconds) is the period used to send ACKs. +# Note that to close the connection the double of the time is needed. +# On other kernels the period depends on the kernel configuration. +# +# A reasonable value for this option is 300 seconds, which is the new +# Redis default starting with Redis 3.2.1. +tcp-keepalive 300 + +################################# GENERAL ##################################### + +# By default Redis does not run as a daemon. Use 'yes' if you need it. +# Note that Redis will write a pid file in /var/run/redis.pid when daemonized. +daemonize no + +# If you run Redis from upstart or systemd, Redis can interact with your +# supervision tree. Options: +# supervised no - no supervision interaction +# supervised upstart - signal upstart by putting Redis into SIGSTOP mode +# supervised systemd - signal systemd by writing READY=1 to $NOTIFY_SOCKET +# supervised auto - detect upstart or systemd method based on +# UPSTART_JOB or NOTIFY_SOCKET environment variables +# Note: these supervision methods only signal "process is ready." +# They do not enable continuous liveness pings back to your supervisor. +supervised no + +# If a pid file is specified, Redis writes it where specified at startup +# and removes it at exit. +# +# When the server runs non daemonized, no pid file is created if none is +# specified in the configuration. When the server is daemonized, the pid file +# is used even if not specified, defaulting to "/var/run/redis.pid". +# +# Creating a pid file is best effort: if Redis is not able to create it +# nothing bad happens, the server will start and run normally. +pidfile /var/run/redis_6379.pid + +# Specify the server verbosity level. +# This can be one of: +# debug (a lot of information, useful for development/testing) +# verbose (many rarely useful info, but not a mess like the debug level) +# notice (moderately verbose, what you want in production probably) +# warning (only very important / critical messages are logged) +loglevel notice + +# Specify the log file name. Also the empty string can be used to force +# Redis to log on the standard output. Note that if you use standard +# output for logging but daemonize, logs will be sent to /dev/null +logfile "" + +# To enable logging to the system logger, just set 'syslog-enabled' to yes, +# and optionally update the other syslog parameters to suit your needs. +# syslog-enabled no + +# Specify the syslog identity. +# syslog-ident redis + +# Specify the syslog facility. Must be USER or between LOCAL0-LOCAL7. +# syslog-facility local0 + +# Set the number of databases. The default database is DB 0, you can select +# a different one on a per-connection basis using SELECT where +# dbid is a number between 0 and 'databases'-1 +databases 16 + +# By default Redis shows an ASCII art logo only when started to log to the +# standard output and if the standard output is a TTY. Basically this means +# that normally a logo is displayed only in interactive sessions. +# +# However it is possible to force the pre-4.0 behavior and always show a +# ASCII art logo in startup logs by setting the following option to yes. +always-show-logo yes + +################################ SNAPSHOTTING ################################ +# +# Save the DB on disk: +# +# save +# +# Will save the DB if both the given number of seconds and the given +# number of write operations against the DB occurred. +# +# In the example below the behaviour will be to save: +# after 900 sec (15 min) if at least 1 key changed +# after 300 sec (5 min) if at least 10 keys changed +# after 60 sec if at least 10000 keys changed +# +# Note: you can disable saving completely by commenting out all "save" lines. +# +# It is also possible to remove all the previously configured save +# points by adding a save directive with a single empty string argument +# like in the following example: +# +# save "" + +#save 900 1 +#save 300 10 +#save 60 10000 + +# By default Redis will stop accepting writes if RDB snapshots are enabled +# (at least one save point) and the latest background save failed. +# This will make the user aware (in a hard way) that data is not persisting +# on disk properly, otherwise chances are that no one will notice and some +# disaster will happen. +# +# If the background saving process will start working again Redis will +# automatically allow writes again. +# +# However if you have setup your proper monitoring of the Redis server +# and persistence, you may want to disable this feature so that Redis will +# continue to work as usual even if there are problems with disk, +# permissions, and so forth. +stop-writes-on-bgsave-error yes + +# Compress string objects using LZF when dump .rdb databases? +# For default that's set to 'yes' as it's almost always a win. +# If you want to save some CPU in the saving child set it to 'no' but +# the dataset will likely be bigger if you have compressible values or keys. +rdbcompression yes + +# Since version 5 of RDB a CRC64 checksum is placed at the end of the file. +# This makes the format more resistant to corruption but there is a performance +# hit to pay (around 10%) when saving and loading RDB files, so you can disable it +# for maximum performances. +# +# RDB files created with checksum disabled have a checksum of zero that will +# tell the loading code to skip the check. +rdbchecksum yes + +# The filename where to dump the DB +dbfilename dump.rdb + +# The working directory. +# +# The DB will be written inside this directory, with the filename specified +# above using the 'dbfilename' configuration directive. +# +# The Append Only File will also be created inside this directory. +# +# Note that you must specify a directory here, not a file name. +dir ./ + +################################# REPLICATION ################################# + +# Master-Replica replication. Use replicaof to make a Redis instance a copy of +# another Redis server. A few things to understand ASAP about Redis replication. +# +# +------------------+ +---------------+ +# | Master | ---> | Replica | +# | (receive writes) | | (exact copy) | +# +------------------+ +---------------+ +# +# 1) Redis replication is asynchronous, but you can configure a master to +# stop accepting writes if it appears to be not connected with at least +# a given number of replicas. +# 2) Redis replicas are able to perform a partial resynchronization with the +# master if the replication link is lost for a relatively small amount of +# time. You may want to configure the replication backlog size (see the next +# sections of this file) with a sensible value depending on your needs. +# 3) Replication is automatic and does not need user intervention. After a +# network partition replicas automatically try to reconnect to masters +# and resynchronize with them. +# +# replicaof + +# If the master is password protected (using the "requirepass" configuration +# directive below) it is possible to tell the replica to authenticate before +# starting the replication synchronization process, otherwise the master will +# refuse the replica request. +# +# masterauth + +# When a replica loses its connection with the master, or when the replication +# is still in progress, the replica can act in two different ways: +# +# 1) if replica-serve-stale-data is set to 'yes' (the default) the replica will +# still reply to client requests, possibly with out of date data, or the +# data set may just be empty if this is the first synchronization. +# +# 2) if replica-serve-stale-data is set to 'no' the replica will reply with +# an error "SYNC with master in progress" to all the kind of commands +# but to INFO, replicaOF, AUTH, PING, SHUTDOWN, REPLCONF, ROLE, CONFIG, +# SUBSCRIBE, UNSUBSCRIBE, PSUBSCRIBE, PUNSUBSCRIBE, PUBLISH, PUBSUB, +# COMMAND, POST, HOST: and LATENCY. +# +replica-serve-stale-data yes + +# You can configure a replica instance to accept writes or not. Writing against +# a replica instance may be useful to store some ephemeral data (because data +# written on a replica will be easily deleted after resync with the master) but +# may also cause problems if clients are writing to it because of a +# misconfiguration. +# +# Since Redis 2.6 by default replicas are read-only. +# +# Note: read only replicas are not designed to be exposed to untrusted clients +# on the internet. It's just a protection layer against misuse of the instance. +# Still a read only replica exports by default all the administrative commands +# such as CONFIG, DEBUG, and so forth. To a limited extent you can improve +# security of read only replicas using 'rename-command' to shadow all the +# administrative / dangerous commands. +replica-read-only yes + +# Replication SYNC strategy: disk or socket. +# +# ------------------------------------------------------- +# WARNING: DISKLESS REPLICATION IS EXPERIMENTAL CURRENTLY +# ------------------------------------------------------- +# +# New replicas and reconnecting replicas that are not able to continue the replication +# process just receiving differences, need to do what is called a "full +# synchronization". An RDB file is transmitted from the master to the replicas. +# The transmission can happen in two different ways: +# +# 1) Disk-backed: The Redis master creates a new process that writes the RDB +# file on disk. Later the file is transferred by the parent +# process to the replicas incrementally. +# 2) Diskless: The Redis master creates a new process that directly writes the +# RDB file to replica sockets, without touching the disk at all. +# +# With disk-backed replication, while the RDB file is generated, more replicas +# can be queued and served with the RDB file as soon as the current child producing +# the RDB file finishes its work. With diskless replication instead once +# the transfer starts, new replicas arriving will be queued and a new transfer +# will start when the current one terminates. +# +# When diskless replication is used, the master waits a configurable amount of +# time (in seconds) before starting the transfer in the hope that multiple replicas +# will arrive and the transfer can be parallelized. +# +# With slow disks and fast (large bandwidth) networks, diskless replication +# works better. +repl-diskless-sync no + +# When diskless replication is enabled, it is possible to configure the delay +# the server waits in order to spawn the child that transfers the RDB via socket +# to the replicas. +# +# This is important since once the transfer starts, it is not possible to serve +# new replicas arriving, that will be queued for the next RDB transfer, so the server +# waits a delay in order to let more replicas arrive. +# +# The delay is specified in seconds, and by default is 5 seconds. To disable +# it entirely just set it to 0 seconds and the transfer will start ASAP. +repl-diskless-sync-delay 5 + +# Replicas send PINGs to server in a predefined interval. It's possible to change +# this interval with the repl_ping_replica_period option. The default value is 10 +# seconds. +# +# repl-ping-replica-period 10 + +# The following option sets the replication timeout for: +# +# 1) Bulk transfer I/O during SYNC, from the point of view of replica. +# 2) Master timeout from the point of view of replicas (data, pings). +# 3) Replica timeout from the point of view of masters (REPLCONF ACK pings). +# +# It is important to make sure that this value is greater than the value +# specified for repl-ping-replica-period otherwise a timeout will be detected +# every time there is low traffic between the master and the replica. +# +# repl-timeout 60 + +# Disable TCP_NODELAY on the replica socket after SYNC? +# +# If you select "yes" Redis will use a smaller number of TCP packets and +# less bandwidth to send data to replicas. But this can add a delay for +# the data to appear on the replica side, up to 40 milliseconds with +# Linux kernels using a default configuration. +# +# If you select "no" the delay for data to appear on the replica side will +# be reduced but more bandwidth will be used for replication. +# +# By default we optimize for low latency, but in very high traffic conditions +# or when the master and replicas are many hops away, turning this to "yes" may +# be a good idea. +repl-disable-tcp-nodelay no + +# Set the replication backlog size. The backlog is a buffer that accumulates +# replica data when replicas are disconnected for some time, so that when a replica +# wants to reconnect again, often a full resync is not needed, but a partial +# resync is enough, just passing the portion of data the replica missed while +# disconnected. +# +# The bigger the replication backlog, the longer the time the replica can be +# disconnected and later be able to perform a partial resynchronization. +# +# The backlog is only allocated once there is at least a replica connected. +# +# repl-backlog-size 1mb + +# After a master has no longer connected replicas for some time, the backlog +# will be freed. The following option configures the amount of seconds that +# need to elapse, starting from the time the last replica disconnected, for +# the backlog buffer to be freed. +# +# Note that replicas never free the backlog for timeout, since they may be +# promoted to masters later, and should be able to correctly "partially +# resynchronize" with the replicas: hence they should always accumulate backlog. +# +# A value of 0 means to never release the backlog. +# +# repl-backlog-ttl 3600 + +# The replica priority is an integer number published by Redis in the INFO output. +# It is used by Redis Sentinel in order to select a replica to promote into a +# master if the master is no longer working correctly. +# +# A replica with a low priority number is considered better for promotion, so +# for instance if there are three replicas with priority 10, 100, 25 Sentinel will +# pick the one with priority 10, that is the lowest. +# +# However a special priority of 0 marks the replica as not able to perform the +# role of master, so a replica with priority of 0 will never be selected by +# Redis Sentinel for promotion. +# +# By default the priority is 100. +replica-priority 100 + +# It is possible for a master to stop accepting writes if there are less than +# N replicas connected, having a lag less or equal than M seconds. +# +# The N replicas need to be in "online" state. +# +# The lag in seconds, that must be <= the specified value, is calculated from +# the last ping received from the replica, that is usually sent every second. +# +# This option does not GUARANTEE that N replicas will accept the write, but +# will limit the window of exposure for lost writes in case not enough replicas +# are available, to the specified number of seconds. +# +# For example to require at least 3 replicas with a lag <= 10 seconds use: +# +# min-replicas-to-write 3 +# min-replicas-max-lag 10 +# +# Setting one or the other to 0 disables the feature. +# +# By default min-replicas-to-write is set to 0 (feature disabled) and +# min-replicas-max-lag is set to 10. + +# A Redis master is able to list the address and port of the attached +# replicas in different ways. For example the "INFO replication" section +# offers this information, which is used, among other tools, by +# Redis Sentinel in order to discover replica instances. +# Another place where this info is available is in the output of the +# "ROLE" command of a master. +# +# The listed IP and address normally reported by a replica is obtained +# in the following way: +# +# IP: The address is auto detected by checking the peer address +# of the socket used by the replica to connect with the master. +# +# Port: The port is communicated by the replica during the replication +# handshake, and is normally the port that the replica is using to +# listen for connections. +# +# However when port forwarding or Network Address Translation (NAT) is +# used, the replica may be actually reachable via different IP and port +# pairs. The following two options can be used by a replica in order to +# report to its master a specific set of IP and port, so that both INFO +# and ROLE will report those values. +# +# There is no need to use both the options if you need to override just +# the port or the IP address. +# +# replica-announce-ip 5.5.5.5 +# replica-announce-port 1234 + +################################## SECURITY ################################### + +# Require clients to issue AUTH before processing any other +# commands. This might be useful in environments in which you do not trust +# others with access to the host running redis-server. +# +# This should stay commented out for backward compatibility and because most +# people do not need auth (e.g. they run their own servers). +# +# Warning: since Redis is pretty fast an outside user can try up to +# 150k passwords per second against a good box. This means that you should +# use a very strong password otherwise it will be very easy to break. +# +# requirepass foobared + +# Command renaming. +# +# It is possible to change the name of dangerous commands in a shared +# environment. For instance the CONFIG command may be renamed into something +# hard to guess so that it will still be available for internal-use tools +# but not available for general clients. +# +# Example: +# +# rename-command CONFIG b840fc02d524045429941cc15f59e41cb7be6c52 +# +# It is also possible to completely kill a command by renaming it into +# an empty string: +# +# rename-command CONFIG "" +# +# Please note that changing the name of commands that are logged into the +# AOF file or transmitted to replicas may cause problems. + +################################### CLIENTS #################################### + +# Set the max number of connected clients at the same time. By default +# this limit is set to 10000 clients, however if the Redis server is not +# able to configure the process file limit to allow for the specified limit +# the max number of allowed clients is set to the current file limit +# minus 32 (as Redis reserves a few file descriptors for internal uses). +# +# Once the limit is reached Redis will close all the new connections sending +# an error 'max number of clients reached'. +# +# maxclients 10000 + +############################## MEMORY MANAGEMENT ################################ + +# Set a memory usage limit to the specified amount of bytes. +# When the memory limit is reached Redis will try to remove keys +# according to the eviction policy selected (see maxmemory-policy). +# +# If Redis can't remove keys according to the policy, or if the policy is +# set to 'noeviction', Redis will start to reply with errors to commands +# that would use more memory, like SET, LPUSH, and so on, and will continue +# to reply to read-only commands like GET. +# +# This option is usually useful when using Redis as an LRU or LFU cache, or to +# set a hard memory limit for an instance (using the 'noeviction' policy). +# +# WARNING: If you have replicas attached to an instance with maxmemory on, +# the size of the output buffers needed to feed the replicas are subtracted +# from the used memory count, so that network problems / resyncs will +# not trigger a loop where keys are evicted, and in turn the output +# buffer of replicas is full with DELs of keys evicted triggering the deletion +# of more keys, and so forth until the database is completely emptied. +# +# In short... if you have replicas attached it is suggested that you set a lower +# limit for maxmemory so that there is some free RAM on the system for replica +# output buffers (but this is not needed if the policy is 'noeviction'). +# +# maxmemory + +maxmemory 128mb + +# MAXMEMORY POLICY: how Redis will select what to remove when maxmemory +# is reached. You can select among five behaviors: +# +# volatile-lru -> Evict using approximated LRU among the keys with an expire set. +# allkeys-lru -> Evict any key using approximated LRU. +# volatile-lfu -> Evict using approximated LFU among the keys with an expire set. +# allkeys-lfu -> Evict any key using approximated LFU. +# volatile-random -> Remove a random key among the ones with an expire set. +# allkeys-random -> Remove a random key, any key. +# volatile-ttl -> Remove the key with the nearest expire time (minor TTL) +# noeviction -> Don't evict anything, just return an error on write operations. +# +# LRU means Least Recently Used +# LFU means Least Frequently Used +# +# Both LRU, LFU and volatile-ttl are implemented using approximated +# randomized algorithms. +# +# Note: with any of the above policies, Redis will return an error on write +# operations, when there are no suitable keys for eviction. +# +# At the date of writing these commands are: set setnx setex append +# incr decr rpush lpush rpushx lpushx linsert lset rpoplpush sadd +# sinter sinterstore sunion sunionstore sdiff sdiffstore zadd zincrby +# zunionstore zinterstore hset hsetnx hmset hincrby incrby decrby +# getset mset msetnx exec sort +# +# The default is: +# +# maxmemory-policy noeviction + +maxmemory-policy volatile-lru + +# LRU, LFU and minimal TTL algorithms are not precise algorithms but approximated +# algorithms (in order to save memory), so you can tune it for speed or +# accuracy. For default Redis will check five keys and pick the one that was +# used less recently, you can change the sample size using the following +# configuration directive. +# +# The default of 5 produces good enough results. 10 Approximates very closely +# true LRU but costs more CPU. 3 is faster but not very accurate. +# +# maxmemory-samples 5 + +# Starting from Redis 5, by default a replica will ignore its maxmemory setting +# (unless it is promoted to master after a failover or manually). It means +# that the eviction of keys will be just handled by the master, sending the +# DEL commands to the replica as keys evict in the master side. +# +# This behavior ensures that masters and replicas stay consistent, and is usually +# what you want, however if your replica is writable, or you want the replica to have +# a different memory setting, and you are sure all the writes performed to the +# replica are idempotent, then you may change this default (but be sure to understand +# what you are doing). +# +# Note that since the replica by default does not evict, it may end using more +# memory than the one set via maxmemory (there are certain buffers that may +# be larger on the replica, or data structures may sometimes take more memory and so +# forth). So make sure you monitor your replicas and make sure they have enough +# memory to never hit a real out-of-memory condition before the master hits +# the configured maxmemory setting. +# +# replica-ignore-maxmemory yes + +############################# LAZY FREEING #################################### + +# Redis has two primitives to delete keys. One is called DEL and is a blocking +# deletion of the object. It means that the server stops processing new commands +# in order to reclaim all the memory associated with an object in a synchronous +# way. If the key deleted is associated with a small object, the time needed +# in order to execute the DEL command is very small and comparable to most other +# O(1) or O(log_N) commands in Redis. However if the key is associated with an +# aggregated value containing millions of elements, the server can block for +# a long time (even seconds) in order to complete the operation. +# +# For the above reasons Redis also offers non blocking deletion primitives +# such as UNLINK (non blocking DEL) and the ASYNC option of FLUSHALL and +# FLUSHDB commands, in order to reclaim memory in background. Those commands +# are executed in constant time. Another thread will incrementally free the +# object in the background as fast as possible. +# +# DEL, UNLINK and ASYNC option of FLUSHALL and FLUSHDB are user-controlled. +# It's up to the design of the application to understand when it is a good +# idea to use one or the other. However the Redis server sometimes has to +# delete keys or flush the whole database as a side effect of other operations. +# Specifically Redis deletes objects independently of a user call in the +# following scenarios: +# +# 1) On eviction, because of the maxmemory and maxmemory policy configurations, +# in order to make room for new data, without going over the specified +# memory limit. +# 2) Because of expire: when a key with an associated time to live (see the +# EXPIRE command) must be deleted from memory. +# 3) Because of a side effect of a command that stores data on a key that may +# already exist. For example the RENAME command may delete the old key +# content when it is replaced with another one. Similarly SUNIONSTORE +# or SORT with STORE option may delete existing keys. The SET command +# itself removes any old content of the specified key in order to replace +# it with the specified string. +# 4) During replication, when a replica performs a full resynchronization with +# its master, the content of the whole database is removed in order to +# load the RDB file just transferred. +# +# In all the above cases the default is to delete objects in a blocking way, +# like if DEL was called. However you can configure each case specifically +# in order to instead release memory in a non-blocking way like if UNLINK +# was called, using the following configuration directives: + +lazyfree-lazy-eviction no +lazyfree-lazy-expire no +lazyfree-lazy-server-del no +replica-lazy-flush no + +############################## APPEND ONLY MODE ############################### + +# By default Redis asynchronously dumps the dataset on disk. This mode is +# good enough in many applications, but an issue with the Redis process or +# a power outage may result into a few minutes of writes lost (depending on +# the configured save points). +# +# The Append Only File is an alternative persistence mode that provides +# much better durability. For instance using the default data fsync policy +# (see later in the config file) Redis can lose just one second of writes in a +# dramatic event like a server power outage, or a single write if something +# wrong with the Redis process itself happens, but the operating system is +# still running correctly. +# +# AOF and RDB persistence can be enabled at the same time without problems. +# If the AOF is enabled on startup Redis will load the AOF, that is the file +# with the better durability guarantees. +# +# Please check http://redis.io/topics/persistence for more information. + +appendonly no + +# The name of the append only file (default: "appendonly.aof") + +appendfilename "appendonly.aof" + +# The fsync() call tells the Operating System to actually write data on disk +# instead of waiting for more data in the output buffer. Some OS will really flush +# data on disk, some other OS will just try to do it ASAP. +# +# Redis supports three different modes: +# +# no: don't fsync, just let the OS flush the data when it wants. Faster. +# always: fsync after every write to the append only log. Slow, Safest. +# everysec: fsync only one time every second. Compromise. +# +# The default is "everysec", as that's usually the right compromise between +# speed and data safety. It's up to you to understand if you can relax this to +# "no" that will let the operating system flush the output buffer when +# it wants, for better performances (but if you can live with the idea of +# some data loss consider the default persistence mode that's snapshotting), +# or on the contrary, use "always" that's very slow but a bit safer than +# everysec. +# +# More details please check the following article: +# http://antirez.com/post/redis-persistence-demystified.html +# +# If unsure, use "everysec". + +# appendfsync always +appendfsync everysec +# appendfsync no + +# When the AOF fsync policy is set to always or everysec, and a background +# saving process (a background save or AOF log background rewriting) is +# performing a lot of I/O against the disk, in some Linux configurations +# Redis may block too long on the fsync() call. Note that there is no fix for +# this currently, as even performing fsync in a different thread will block +# our synchronous write(2) call. +# +# In order to mitigate this problem it's possible to use the following option +# that will prevent fsync() from being called in the main process while a +# BGSAVE or BGREWRITEAOF is in progress. +# +# This means that while another child is saving, the durability of Redis is +# the same as "appendfsync none". In practical terms, this means that it is +# possible to lose up to 30 seconds of log in the worst scenario (with the +# default Linux settings). +# +# If you have latency problems turn this to "yes". Otherwise leave it as +# "no" that is the safest pick from the point of view of durability. + +no-appendfsync-on-rewrite no + +# Automatic rewrite of the append only file. +# Redis is able to automatically rewrite the log file implicitly calling +# BGREWRITEAOF when the AOF log size grows by the specified percentage. +# +# This is how it works: Redis remembers the size of the AOF file after the +# latest rewrite (if no rewrite has happened since the restart, the size of +# the AOF at startup is used). +# +# This base size is compared to the current size. If the current size is +# bigger than the specified percentage, the rewrite is triggered. Also +# you need to specify a minimal size for the AOF file to be rewritten, this +# is useful to avoid rewriting the AOF file even if the percentage increase +# is reached but it is still pretty small. +# +# Specify a percentage of zero in order to disable the automatic AOF +# rewrite feature. + +auto-aof-rewrite-percentage 100 +auto-aof-rewrite-min-size 64mb + +# An AOF file may be found to be truncated at the end during the Redis +# startup process, when the AOF data gets loaded back into memory. +# This may happen when the system where Redis is running +# crashes, especially when an ext4 filesystem is mounted without the +# data=ordered option (however this can't happen when Redis itself +# crashes or aborts but the operating system still works correctly). +# +# Redis can either exit with an error when this happens, or load as much +# data as possible (the default now) and start if the AOF file is found +# to be truncated at the end. The following option controls this behavior. +# +# If aof-load-truncated is set to yes, a truncated AOF file is loaded and +# the Redis server starts emitting a log to inform the user of the event. +# Otherwise if the option is set to no, the server aborts with an error +# and refuses to start. When the option is set to no, the user requires +# to fix the AOF file using the "redis-check-aof" utility before to restart +# the server. +# +# Note that if the AOF file will be found to be corrupted in the middle +# the server will still exit with an error. This option only applies when +# Redis will try to read more data from the AOF file but not enough bytes +# will be found. +aof-load-truncated yes + +# When rewriting the AOF file, Redis is able to use an RDB preamble in the +# AOF file for faster rewrites and recoveries. When this option is turned +# on the rewritten AOF file is composed of two different stanzas: +# +# [RDB file][AOF tail] +# +# When loading Redis recognizes that the AOF file starts with the "REDIS" +# string and loads the prefixed RDB file, and continues loading the AOF +# tail. +aof-use-rdb-preamble yes + +################################ LUA SCRIPTING ############################### + +# Max execution time of a Lua script in milliseconds. +# +# If the maximum execution time is reached Redis will log that a script is +# still in execution after the maximum allowed time and will start to +# reply to queries with an error. +# +# When a long running script exceeds the maximum execution time only the +# SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be +# used to stop a script that did not yet called write commands. The second +# is the only way to shut down the server in the case a write command was +# already issued by the script but the user doesn't want to wait for the natural +# termination of the script. +# +# Set it to 0 or a negative value for unlimited execution without warnings. +lua-time-limit 5000 + +################################ REDIS CLUSTER ############################### + +# Normal Redis instances can't be part of a Redis Cluster; only nodes that are +# started as cluster nodes can. In order to start a Redis instance as a +# cluster node enable the cluster support uncommenting the following: +# +# cluster-enabled yes + +# Every cluster node has a cluster configuration file. This file is not +# intended to be edited by hand. It is created and updated by Redis nodes. +# Every Redis Cluster node requires a different cluster configuration file. +# Make sure that instances running in the same system do not have +# overlapping cluster configuration file names. +# +# cluster-config-file nodes-6379.conf + +# Cluster node timeout is the amount of milliseconds a node must be unreachable +# for it to be considered in failure state. +# Most other internal time limits are multiple of the node timeout. +# +# cluster-node-timeout 15000 + +# A replica of a failing master will avoid to start a failover if its data +# looks too old. +# +# There is no simple way for a replica to actually have an exact measure of +# its "data age", so the following two checks are performed: +# +# 1) If there are multiple replicas able to failover, they exchange messages +# in order to try to give an advantage to the replica with the best +# replication offset (more data from the master processed). +# Replicas will try to get their rank by offset, and apply to the start +# of the failover a delay proportional to their rank. +# +# 2) Every single replica computes the time of the last interaction with +# its master. This can be the last ping or command received (if the master +# is still in the "connected" state), or the time that elapsed since the +# disconnection with the master (if the replication link is currently down). +# If the last interaction is too old, the replica will not try to failover +# at all. +# +# The point "2" can be tuned by user. Specifically a replica will not perform +# the failover if, since the last interaction with the master, the time +# elapsed is greater than: +# +# (node-timeout * replica-validity-factor) + repl-ping-replica-period +# +# So for example if node-timeout is 30 seconds, and the replica-validity-factor +# is 10, and assuming a default repl-ping-replica-period of 10 seconds, the +# replica will not try to failover if it was not able to talk with the master +# for longer than 310 seconds. +# +# A large replica-validity-factor may allow replicas with too old data to failover +# a master, while a too small value may prevent the cluster from being able to +# elect a replica at all. +# +# For maximum availability, it is possible to set the replica-validity-factor +# to a value of 0, which means, that replicas will always try to failover the +# master regardless of the last time they interacted with the master. +# (However they'll always try to apply a delay proportional to their +# offset rank). +# +# Zero is the only value able to guarantee that when all the partitions heal +# the cluster will always be able to continue. +# +# cluster-replica-validity-factor 10 + +# Cluster replicas are able to migrate to orphaned masters, that are masters +# that are left without working replicas. This improves the cluster ability +# to resist to failures as otherwise an orphaned master can't be failed over +# in case of failure if it has no working replicas. +# +# Replicas migrate to orphaned masters only if there are still at least a +# given number of other working replicas for their old master. This number +# is the "migration barrier". A migration barrier of 1 means that a replica +# will migrate only if there is at least 1 other working replica for its master +# and so forth. It usually reflects the number of replicas you want for every +# master in your cluster. +# +# Default is 1 (replicas migrate only if their masters remain with at least +# one replica). To disable migration just set it to a very large value. +# A value of 0 can be set but is useful only for debugging and dangerous +# in production. +# +# cluster-migration-barrier 1 + +# By default Redis Cluster nodes stop accepting queries if they detect there +# is at least an hash slot uncovered (no available node is serving it). +# This way if the cluster is partially down (for example a range of hash slots +# are no longer covered) all the cluster becomes, eventually, unavailable. +# It automatically returns available as soon as all the slots are covered again. +# +# However sometimes you want the subset of the cluster which is working, +# to continue to accept queries for the part of the key space that is still +# covered. In order to do so, just set the cluster-require-full-coverage +# option to no. +# +# cluster-require-full-coverage yes + +# This option, when set to yes, prevents replicas from trying to failover its +# master during master failures. However the master can still perform a +# manual failover, if forced to do so. +# +# This is useful in different scenarios, especially in the case of multiple +# data center operations, where we want one side to never be promoted if not +# in the case of a total DC failure. +# +# cluster-replica-no-failover no + +# In order to setup your cluster make sure to read the documentation +# available at http://redis.io web site. + +########################## CLUSTER DOCKER/NAT support ######################## + +# In certain deployments, Redis Cluster nodes address discovery fails, because +# addresses are NAT-ted or because ports are forwarded (the typical case is +# Docker and other containers). +# +# In order to make Redis Cluster working in such environments, a static +# configuration where each node knows its public address is needed. The +# following two options are used for this scope, and are: +# +# * cluster-announce-ip +# * cluster-announce-port +# * cluster-announce-bus-port +# +# Each instruct the node about its address, client port, and cluster message +# bus port. The information is then published in the header of the bus packets +# so that other nodes will be able to correctly map the address of the node +# publishing the information. +# +# If the above options are not used, the normal Redis Cluster auto-detection +# will be used instead. +# +# Note that when remapped, the bus port may not be at the fixed offset of +# clients port + 10000, so you can specify any port and bus-port depending +# on how they get remapped. If the bus-port is not set, a fixed offset of +# 10000 will be used as usually. +# +# Example: +# +# cluster-announce-ip 10.1.1.5 +# cluster-announce-port 6379 +# cluster-announce-bus-port 6380 + +################################## SLOW LOG ################################### + +# The Redis Slow Log is a system to log queries that exceeded a specified +# execution time. The execution time does not include the I/O operations +# like talking with the client, sending the reply and so forth, +# but just the time needed to actually execute the command (this is the only +# stage of command execution where the thread is blocked and can not serve +# other requests in the meantime). +# +# You can configure the slow log with two parameters: one tells Redis +# what is the execution time, in microseconds, to exceed in order for the +# command to get logged, and the other parameter is the length of the +# slow log. When a new command is logged the oldest one is removed from the +# queue of logged commands. + +# The following time is expressed in microseconds, so 1000000 is equivalent +# to one second. Note that a negative number disables the slow log, while +# a value of zero forces the logging of every command. +slowlog-log-slower-than 10000 + +# There is no limit to this length. Just be aware that it will consume memory. +# You can reclaim memory used by the slow log with SLOWLOG RESET. +slowlog-max-len 128 + +################################ LATENCY MONITOR ############################## + +# The Redis latency monitoring subsystem samples different operations +# at runtime in order to collect data related to possible sources of +# latency of a Redis instance. +# +# Via the LATENCY command this information is available to the user that can +# print graphs and obtain reports. +# +# The system only logs operations that were performed in a time equal or +# greater than the amount of milliseconds specified via the +# latency-monitor-threshold configuration directive. When its value is set +# to zero, the latency monitor is turned off. +# +# By default latency monitoring is disabled since it is mostly not needed +# if you don't have latency issues, and collecting data has a performance +# impact, that while very small, can be measured under big load. Latency +# monitoring can easily be enabled at runtime using the command +# "CONFIG SET latency-monitor-threshold " if needed. +latency-monitor-threshold 0 + +############################# EVENT NOTIFICATION ############################## + +# Redis can notify Pub/Sub clients about events happening in the key space. +# This feature is documented at http://redis.io/topics/notifications +# +# For instance if keyspace events notification is enabled, and a client +# performs a DEL operation on key "foo" stored in the Database 0, two +# messages will be published via Pub/Sub: +# +# PUBLISH __keyspace@0__:foo del +# PUBLISH __keyevent@0__:del foo +# +# It is possible to select the events that Redis will notify among a set +# of classes. Every class is identified by a single character: +# +# K Keyspace events, published with __keyspace@__ prefix. +# E Keyevent events, published with __keyevent@__ prefix. +# g Generic commands (non-type specific) like DEL, EXPIRE, RENAME, ... +# $ String commands +# l List commands +# s Set commands +# h Hash commands +# z Sorted set commands +# x Expired events (events generated every time a key expires) +# e Evicted events (events generated when a key is evicted for maxmemory) +# A Alias for g$lshzxe, so that the "AKE" string means all the events. +# +# The "notify-keyspace-events" takes as argument a string that is composed +# of zero or multiple characters. The empty string means that notifications +# are disabled. +# +# Example: to enable list and generic events, from the point of view of the +# event name, use: +# +# notify-keyspace-events Elg +# +# Example 2: to get the stream of the expired keys subscribing to channel +# name __keyevent@0__:expired use: +# +# notify-keyspace-events Ex +# +# By default all notifications are disabled because most users don't need +# this feature and the feature has some overhead. Note that if you don't +# specify at least one of K or E, no events will be delivered. +notify-keyspace-events "" + +############################### ADVANCED CONFIG ############################### + +# Hashes are encoded using a memory efficient data structure when they have a +# small number of entries, and the biggest entry does not exceed a given +# threshold. These thresholds can be configured using the following directives. +hash-max-ziplist-entries 512 +hash-max-ziplist-value 64 + +# Lists are also encoded in a special way to save a lot of space. +# The number of entries allowed per internal list node can be specified +# as a fixed maximum size or a maximum number of elements. +# For a fixed maximum size, use -5 through -1, meaning: +# -5: max size: 64 Kb <-- not recommended for normal workloads +# -4: max size: 32 Kb <-- not recommended +# -3: max size: 16 Kb <-- probably not recommended +# -2: max size: 8 Kb <-- good +# -1: max size: 4 Kb <-- good +# Positive numbers mean store up to _exactly_ that number of elements +# per list node. +# The highest performing option is usually -2 (8 Kb size) or -1 (4 Kb size), +# but if your use case is unique, adjust the settings as necessary. +list-max-ziplist-size -2 + +# Lists may also be compressed. +# Compress depth is the number of quicklist ziplist nodes from *each* side of +# the list to *exclude* from compression. The head and tail of the list +# are always uncompressed for fast push/pop operations. Settings are: +# 0: disable all list compression +# 1: depth 1 means "don't start compressing until after 1 node into the list, +# going from either the head or tail" +# So: [head]->node->node->...->node->[tail] +# [head], [tail] will always be uncompressed; inner nodes will compress. +# 2: [head]->[next]->node->node->...->node->[prev]->[tail] +# 2 here means: don't compress head or head->next or tail->prev or tail, +# but compress all nodes between them. +# 3: [head]->[next]->[next]->node->node->...->node->[prev]->[prev]->[tail] +# etc. +list-compress-depth 0 + +# Sets have a special encoding in just one case: when a set is composed +# of just strings that happen to be integers in radix 10 in the range +# of 64 bit signed integers. +# The following configuration setting sets the limit in the size of the +# set in order to use this special memory saving encoding. +set-max-intset-entries 512 + +# Similarly to hashes and lists, sorted sets are also specially encoded in +# order to save a lot of space. This encoding is only used when the length and +# elements of a sorted set are below the following limits: +zset-max-ziplist-entries 128 +zset-max-ziplist-value 64 + +# HyperLogLog sparse representation bytes limit. The limit includes the +# 16 bytes header. When an HyperLogLog using the sparse representation crosses +# this limit, it is converted into the dense representation. +# +# A value greater than 16000 is totally useless, since at that point the +# dense representation is more memory efficient. +# +# The suggested value is ~ 3000 in order to have the benefits of +# the space efficient encoding without slowing down too much PFADD, +# which is O(N) with the sparse encoding. The value can be raised to +# ~ 10000 when CPU is not a concern, but space is, and the data set is +# composed of many HyperLogLogs with cardinality in the 0 - 15000 range. +hll-sparse-max-bytes 3000 + +# Streams macro node max size / items. The stream data structure is a radix +# tree of big nodes that encode multiple items inside. Using this configuration +# it is possible to configure how big a single node can be in bytes, and the +# maximum number of items it may contain before switching to a new node when +# appending new stream entries. If any of the following settings are set to +# zero, the limit is ignored, so for instance it is possible to set just a +# max entires limit by setting max-bytes to 0 and max-entries to the desired +# value. +stream-node-max-bytes 4096 +stream-node-max-entries 100 + +# Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in +# order to help rehashing the main Redis hash table (the one mapping top-level +# keys to values). The hash table implementation Redis uses (see dict.c) +# performs a lazy rehashing: the more operation you run into a hash table +# that is rehashing, the more rehashing "steps" are performed, so if the +# server is idle the rehashing is never complete and some more memory is used +# by the hash table. +# +# The default is to use this millisecond 10 times every second in order to +# actively rehash the main dictionaries, freeing memory when possible. +# +# If unsure: +# use "activerehashing no" if you have hard latency requirements and it is +# not a good thing in your environment that Redis can reply from time to time +# to queries with 2 milliseconds delay. +# +# use "activerehashing yes" if you don't have such hard requirements but +# want to free memory asap when possible. +activerehashing yes + +# The client output buffer limits can be used to force disconnection of clients +# that are not reading data from the server fast enough for some reason (a +# common reason is that a Pub/Sub client can't consume messages as fast as the +# publisher can produce them). +# +# The limit can be set differently for the three different classes of clients: +# +# normal -> normal clients including MONITOR clients +# replica -> replica clients +# pubsub -> clients subscribed to at least one pubsub channel or pattern +# +# The syntax of every client-output-buffer-limit directive is the following: +# +# client-output-buffer-limit +# +# A client is immediately disconnected once the hard limit is reached, or if +# the soft limit is reached and remains reached for the specified number of +# seconds (continuously). +# So for instance if the hard limit is 32 megabytes and the soft limit is +# 16 megabytes / 10 seconds, the client will get disconnected immediately +# if the size of the output buffers reach 32 megabytes, but will also get +# disconnected if the client reaches 16 megabytes and continuously overcomes +# the limit for 10 seconds. +# +# By default normal clients are not limited because they don't receive data +# without asking (in a push way), but just after a request, so only +# asynchronous clients may create a scenario where data is requested faster +# than it can read. +# +# Instead there is a default limit for pubsub and replica clients, since +# subscribers and replicas receive data in a push fashion. +# +# Both the hard or the soft limit can be disabled by setting them to zero. +client-output-buffer-limit normal 0 0 0 +client-output-buffer-limit replica 256mb 64mb 60 +client-output-buffer-limit pubsub 32mb 8mb 60 + +# Client query buffers accumulate new commands. They are limited to a fixed +# amount by default in order to avoid that a protocol desynchronization (for +# instance due to a bug in the client) will lead to unbound memory usage in +# the query buffer. However you can configure it here if you have very special +# needs, such us huge multi/exec requests or alike. +# +# client-query-buffer-limit 1gb + +# In the Redis protocol, bulk requests, that are, elements representing single +# strings, are normally limited ot 512 mb. However you can change this limit +# here. +# +# proto-max-bulk-len 512mb + +# Redis calls an internal function to perform many background tasks, like +# closing connections of clients in timeout, purging expired keys that are +# never requested, and so forth. +# +# Not all tasks are performed with the same frequency, but Redis checks for +# tasks to perform according to the specified "hz" value. +# +# By default "hz" is set to 10. Raising the value will use more CPU when +# Redis is idle, but at the same time will make Redis more responsive when +# there are many keys expiring at the same time, and timeouts may be +# handled with more precision. +# +# The range is between 1 and 500, however a value over 100 is usually not +# a good idea. Most users should use the default of 10 and raise this up to +# 100 only in environments where very low latency is required. +hz 10 + +# Normally it is useful to have an HZ value which is proportional to the +# number of clients connected. This is useful in order, for instance, to +# avoid too many clients are processed for each background task invocation +# in order to avoid latency spikes. +# +# Since the default HZ value by default is conservatively set to 10, Redis +# offers, and enables by default, the ability to use an adaptive HZ value +# which will temporary raise when there are many connected clients. +# +# When dynamic HZ is enabled, the actual configured HZ will be used as +# as a baseline, but multiples of the configured HZ value will be actually +# used as needed once more clients are connected. In this way an idle +# instance will use very little CPU time while a busy instance will be +# more responsive. +dynamic-hz yes + +# When a child rewrites the AOF file, if the following option is enabled +# the file will be fsync-ed every 32 MB of data generated. This is useful +# in order to commit the file to the disk more incrementally and avoid +# big latency spikes. +aof-rewrite-incremental-fsync yes + +# When redis saves RDB file, if the following option is enabled +# the file will be fsync-ed every 32 MB of data generated. This is useful +# in order to commit the file to the disk more incrementally and avoid +# big latency spikes. +rdb-save-incremental-fsync yes + +# Redis LFU eviction (see maxmemory setting) can be tuned. However it is a good +# idea to start with the default settings and only change them after investigating +# how to improve the performances and how the keys LFU change over time, which +# is possible to inspect via the OBJECT FREQ command. +# +# There are two tunable parameters in the Redis LFU implementation: the +# counter logarithm factor and the counter decay time. It is important to +# understand what the two parameters mean before changing them. +# +# The LFU counter is just 8 bits per key, it's maximum value is 255, so Redis +# uses a probabilistic increment with logarithmic behavior. Given the value +# of the old counter, when a key is accessed, the counter is incremented in +# this way: +# +# 1. A random number R between 0 and 1 is extracted. +# 2. A probability P is calculated as 1/(old_value*lfu_log_factor+1). +# 3. The counter is incremented only if R < P. +# +# The default lfu-log-factor is 10. This is a table of how the frequency +# counter changes with a different number of accesses with different +# logarithmic factors: +# +# +--------+------------+------------+------------+------------+------------+ +# | factor | 100 hits | 1000 hits | 100K hits | 1M hits | 10M hits | +# +--------+------------+------------+------------+------------+------------+ +# | 0 | 104 | 255 | 255 | 255 | 255 | +# +--------+------------+------------+------------+------------+------------+ +# | 1 | 18 | 49 | 255 | 255 | 255 | +# +--------+------------+------------+------------+------------+------------+ +# | 10 | 10 | 18 | 142 | 255 | 255 | +# +--------+------------+------------+------------+------------+------------+ +# | 100 | 8 | 11 | 49 | 143 | 255 | +# +--------+------------+------------+------------+------------+------------+ +# +# NOTE: The above table was obtained by running the following commands: +# +# redis-benchmark -n 1000000 incr foo +# redis-cli object freq foo +# +# NOTE 2: The counter initial value is 5 in order to give new objects a chance +# to accumulate hits. +# +# The counter decay time is the time, in minutes, that must elapse in order +# for the key counter to be divided by two (or decremented if it has a value +# less <= 10). +# +# The default value for the lfu-decay-time is 1. A Special value of 0 means to +# decay the counter every time it happens to be scanned. +# +# lfu-log-factor 10 +# lfu-decay-time 1 + +########################### ACTIVE DEFRAGMENTATION ####################### +# +# WARNING THIS FEATURE IS EXPERIMENTAL. However it was stress tested +# even in production and manually tested by multiple engineers for some +# time. +# +# What is active defragmentation? +# ------------------------------- +# +# Active (online) defragmentation allows a Redis server to compact the +# spaces left between small allocations and deallocations of data in memory, +# thus allowing to reclaim back memory. +# +# Fragmentation is a natural process that happens with every allocator (but +# less so with Jemalloc, fortunately) and certain workloads. Normally a server +# restart is needed in order to lower the fragmentation, or at least to flush +# away all the data and create it again. However thanks to this feature +# implemented by Oran Agra for Redis 4.0 this process can happen at runtime +# in an "hot" way, while the server is running. +# +# Basically when the fragmentation is over a certain level (see the +# configuration options below) Redis will start to create new copies of the +# values in contiguous memory regions by exploiting certain specific Jemalloc +# features (in order to understand if an allocation is causing fragmentation +# and to allocate it in a better place), and at the same time, will release the +# old copies of the data. This process, repeated incrementally for all the keys +# will cause the fragmentation to drop back to normal values. +# +# Important things to understand: +# +# 1. This feature is disabled by default, and only works if you compiled Redis +# to use the copy of Jemalloc we ship with the source code of Redis. +# This is the default with Linux builds. +# +# 2. You never need to enable this feature if you don't have fragmentation +# issues. +# +# 3. Once you experience fragmentation, you can enable this feature when +# needed with the command "CONFIG SET activedefrag yes". +# +# The configuration parameters are able to fine tune the behavior of the +# defragmentation process. If you are not sure about what they mean it is +# a good idea to leave the defaults untouched. + +# Enabled active defragmentation +# activedefrag yes + +# Minimum amount of fragmentation waste to start active defrag +# active-defrag-ignore-bytes 100mb + +# Minimum percentage of fragmentation to start active defrag +# active-defrag-threshold-lower 10 + +# Maximum percentage of fragmentation at which we use maximum effort +# active-defrag-threshold-upper 100 + +# Minimal effort for defrag in CPU percentage +# active-defrag-cycle-min 5 + +# Maximal effort for defrag in CPU percentage +# active-defrag-cycle-max 75 + +# Maximum number of set/hash/zset/list fields that will be processed from +# the main dictionary scan +# active-defrag-max-scan-fields 1000 diff --git a/src/Helper/Sentiment.py b/src/Helper/Sentiment.py new file mode 100644 index 0000000..62eca9c --- /dev/null +++ b/src/Helper/Sentiment.py @@ -0,0 +1,130 @@ +try: + import urllib.parse as urlparse +except ImportError: + import urlparse +import requests +from bs4 import BeautifulSoup +from textblob import TextBlob +from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer + +from Initializer.LoggerInit import * + + +def get_page_text(url): + + max_paragraphs = 10 + + try: + logger.debug(url) + req = requests.get(url) + html = req.text + soup = BeautifulSoup(html, 'html.parser') + html_p = soup.findAll('p') + + logger.debug(html_p) + + if html_p: + n = 1 + for i in html_p: + if n <= max_paragraphs: + if i.string is not None: + logger.debug(i.string) + yield i.string + n += 1 + + except requests.exceptions.RequestException as re: + logger.warning("Exception: can't crawl web site (%s)" % re) + pass + + +def get_sentiment_from_url(text, sentimentURL): + payload = {'text': text} + + try: + post = requests.post(sentimentURL, data=payload) + logger.debug(post.status_code) + logger.debug(post.text) + except requests.exceptions.RequestException as re: + logger.error("Exception: requests exception getting sentiment from url caused by %s" % re) + raise + + # return None if we are getting throttled or other connection problem + if post.status_code != 200: + logger.warning("Can't get sentiment from url caused by %s %s" % (post.status_code, post.text)) + return None + + response = post.json() + logger.debug(response) + + # neg = response['probability']['neg'] + # neutral = response['probability']['neutral'] + # pos = response['probability']['pos'] + label = response['label'] + + # determine if sentiment is positive, negative, or neutral + if label == "neg": + sentiment = "negative" + elif label == "neutral": + sentiment = "neutral" + else: + sentiment = "positive" + + return sentiment + + +def sentiment_analysis(text): + """Determine if sentiment is positive, negative, or neutral + algorithm to figure out if sentiment is positive, negative or neutral + uses sentiment polarity from TextBlob, VADER Sentiment and + sentiment from text-processing URL + could be made better :) + """ + sentimentURL = 'http://text-processing.com/api/sentiment/' + # pass text into sentiment url + sentiment_url = get_sentiment_from_url(text, sentimentURL) + + # pass text into TextBlob + text_tb = TextBlob(text) + + # pass text into VADER Sentiment + analyzer = SentimentIntensityAnalyzer() + text_vs = analyzer.polarity_scores(text) + + if sentiment_url is None: + if text_tb.sentiment.polarity <= 0 and text_vs['compound'] <= -0.5: + sentiment = "negative" # very negative + elif text_tb.sentiment.polarity <= 0 and text_vs['compound'] <= -0.1: + sentiment = "negative" # somewhat negative + elif text_tb.sentiment.polarity == 0 and text_vs['compound'] > -0.1 and text_vs['compound'] < 0.1: + sentiment = "neutral" + elif text_tb.sentiment.polarity >= 0 and text_vs['compound'] >= 0.1: + sentiment = "positive" # somewhat positive + elif text_tb.sentiment.polarity > 0 and text_vs['compound'] >= 0.1: + sentiment = "positive" # very positive + else: + sentiment = "neutral" + else: + if text_tb.sentiment.polarity < 0 and text_vs['compound'] <= -0.1 and sentiment_url == "negative": + sentiment = "negative" # very negative + elif text_tb.sentiment.polarity <= 0 and text_vs['compound'] < 0 and sentiment_url == "neutral": + sentiment = "negative" # somewhat negative + elif text_tb.sentiment.polarity >= 0 and text_vs['compound'] > 0 and sentiment_url == "neutral": + sentiment = "positive" # somewhat positive + elif text_tb.sentiment.polarity > 0 and text_vs['compound'] >= 0.1 and sentiment_url == "positive": + sentiment = "positive" # very positive + else: + sentiment = "neutral" + + # calculate average polarity from TextBlob and VADER + polarity = (text_tb.sentiment.polarity + text_vs['compound']) / 2 + # output sentiment polarity + print("Sentiment Polarity: " + str(polarity)) + + # output sentiment subjectivity (TextBlob) + print("Sentiment Subjectivity: " + str(text_tb.sentiment.subjectivity)) + + # output sentiment + print("Sentiment (url): " + str(sentiment_url)) + print("Sentiment (algorithm): " + str(sentiment)) + + return polarity, text_tb.sentiment.subjectivity, sentiment \ No newline at end of file diff --git a/src/Initializer/ElasticSearchInit.py b/src/Initializer/ElasticSearchInit.py new file mode 100644 index 0000000..ce4c128 --- /dev/null +++ b/src/Initializer/ElasticSearchInit.py @@ -0,0 +1,10 @@ +try: + from elasticsearch5 import Elasticsearch +except ImportError: + from elasticsearch import Elasticsearch + +from config import elasticsearch_host, elasticsearch_port, elasticsearch_user, elasticsearch_password + +# create instance of elasticsearch +es = Elasticsearch(hosts=[{'host': elasticsearch_host, 'port': elasticsearch_port}], + http_auth=(elasticsearch_user, elasticsearch_password)) \ No newline at end of file diff --git a/src/Initializer/LoggerInit.py b/src/Initializer/LoggerInit.py new file mode 100644 index 0000000..aa825c4 --- /dev/null +++ b/src/Initializer/LoggerInit.py @@ -0,0 +1,25 @@ +import logging + + +# set up logging +logger = logging.getLogger('stocksight') +logger.setLevel(logging.INFO) +eslogger = logging.getLogger('elasticsearch') +eslogger.setLevel(logging.WARNING) +requestslogger = logging.getLogger('requests') +requestslogger.setLevel(logging.INFO) +logging.addLevelName( + logging.INFO, "\033[1;32m%s\033[1;0m" + % logging.getLevelName(logging.INFO)) +logging.addLevelName( + logging.WARNING, "\033[1;31m%s\033[1;0m" + % logging.getLevelName(logging.WARNING)) +logging.addLevelName( + logging.ERROR, "\033[1;41m%s\033[1;0m" + % logging.getLevelName(logging.ERROR)) +logging.addLevelName( + logging.DEBUG, "\033[1;33m%s\033[1;0m" + % logging.getLevelName(logging.DEBUG)) +logformatter = '%(asctime)s [%(levelname)s][%(name)s] %(message)s' +loglevel = logging.INFO +logging.basicConfig(format=logformatter, level=loglevel) \ No newline at end of file diff --git a/src/Initializer/RedisInit.py b/src/Initializer/RedisInit.py new file mode 100644 index 0000000..62997b5 --- /dev/null +++ b/src/Initializer/RedisInit.py @@ -0,0 +1,4 @@ +import redis +from config import redis_host, redis_port + +rds = redis.Redis(host=str(redis_host), port=redis_port, db=0) \ No newline at end of file diff --git a/src/Initializer/str_unicode.py b/src/Initializer/str_unicode.py new file mode 100644 index 0000000..bc371c6 --- /dev/null +++ b/src/Initializer/str_unicode.py @@ -0,0 +1,6 @@ +import sys + +IS_PY3 = sys.version_info >= (3, 0) + +if IS_PY3: + unicode = str \ No newline at end of file diff --git a/src/NewsHeadlineListener.py b/src/NewsHeadlineListener.py new file mode 100644 index 0000000..ea99ef4 --- /dev/null +++ b/src/NewsHeadlineListener.py @@ -0,0 +1,129 @@ +import re +import time +from datetime import datetime + +import nltk +import hashlib + +try: + import urllib.parse as urlparse +except ImportError: + import urlparse + +from config import * +from Initializer.str_unicode import * +from Helper.Sentiment import * +from Initializer.ElasticSearchInit import es +from Initializer.RedisInit import rds +from Initializer.LoggerInit import * + + + +class NewsHeadlineListener: + def __init__(self, symbol,url=None): + self.symbol = symbol + self.url = url + + new_headlines = self.get_news_headlines(self.url) + + # add any new headlines + for htext, htext_url in new_headlines: + + md5Hash = hashlib.md5( (htext+htext_url).encode() ).hexdigest() + if rds.exists(md5Hash): + + datenow = datetime.utcnow().isoformat() + # output news data + print("\n------------------------------") + print("Date: " + datenow) + print("News Headline: " + htext) + print("Location (url): " + htext_url) + + # create tokens of words in text using nltk + text_for_tokens = re.sub( + r"[\%|\$|\.|\,|\!|\:|\@]|\(|\)|\#|\+|(``)|('')|\?|\-", "", htext) + tokens = nltk.word_tokenize(text_for_tokens.lower()) + print("NLTK Tokens: " + str(tokens)) + + # check ignored tokens from config + for t in nltk_tokens_ignored: + if t in tokens: + logger.info("Text contains token from ignore list, not adding") + continue + # check required tokens from config + tokenspass = False + + + if self.symbol in nltk_tokens_required: + nltk_tokens = nltk_tokens_required[self.symbol] + else: + nltk_tokens = nltk_tokens_required['default'] + + for t in nltk_tokens: + if t in tokens: + tokenspass = True + break + if not tokenspass: + logger.info("Text does not contain token from required list, not adding") + continue + + # get sentiment values + polarity, subjectivity, sentiment = sentiment_analysis(htext) + + logger.info("Adding news headline to elasticsearch") + # add news headline data and sentiment info to elasticsearch + es.index(index=self.symbol, + doc_type="newsheadline", + body={"date": datenow, + "location": htext_url, + "message": htext, + "polarity": polarity, + "subjectivity": subjectivity, + "sentiment": sentiment}) + rds.set(md5Hash,True) + + + def get_news_headlines(self, url): + + latestheadlines = [] + latestheadlines_links = [] + parsed_uri = urlparse.urljoin(url, '/') + + try: + + req = requests.get(url) + html = req.text + soup = BeautifulSoup(html, 'html.parser') + html = soup.findAll('h3') + links = soup.findAll('a') + + logger.debug(html) + logger.debug(links) + + if html: + for i in html: + latestheadlines.append((unicode(i.next.next.next.next), url)) + logger.debug(latestheadlines) + + if follow_link: + if links: + for i in links: + if '/news/' in i['href']: + l = parsed_uri.rstrip('/') + i['href'] + latestheadlines_links.append(l) + + logger.debug(latestheadlines_links) + + logger.info("Following any new links and grabbing text from page...") + + for linkurl in latestheadlines_links: + for p in get_page_text(linkurl): + latestheadlines.append((unicode(p), linkurl)) + logger.debug(latestheadlines) + + except requests.exceptions.RequestException as re: + logger.warning("Exception: can't crawl web site (%s)" % re) + pass + + return latestheadlines + diff --git a/src/delindex.py b/src/delindex.py new file mode 100644 index 0000000..03b85be --- /dev/null +++ b/src/delindex.py @@ -0,0 +1,18 @@ +import argparse + +from Initializer.LoggerInit import * +from Initializer.ElasticSearchInit import es + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + parser.add_argument("-d", "--delindex", action="store_true", + help="Delete existing Elasticsearch index first") + args = parser.parse_args() + + eslogger = logging.getLogger('elasticsearch') + eslogger.setLevel(logging.INFO) + + if args.delindex: + eslogger.info('Deleting existing Elasticsearch index ' + args.index) + es.indices.delete(index=args.index, ignore=[400, 404]) diff --git a/src/news.sentiment.py b/src/news.sentiment.py new file mode 100644 index 0000000..5e7b227 --- /dev/null +++ b/src/news.sentiment.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""sentiment.py - analyze tweets news sites and their sentiment values to +Elasticsearch. +See README.md or https://github.com/heyqule/stocksight +for more information. + +Copyright (C) Allen Xie 2019 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" + +import argparse +import json +import re +import sys +import time + +import nltk +import requests + +try: + import urllib.parse as urlparse +except ImportError: + import urlparse + +# import elasticsearch host, twitter keys and tokens +from NewsHeadlineListener import * + + +STOCKSIGHT_VERSION = '0.1-b.6' +__version__ = STOCKSIGHT_VERSION + +IS_PY3 = sys.version_info >= (3, 0) + +if IS_PY3: + unicode = str + + + +if __name__ == '__main__': + # parse cli args + parser = argparse.ArgumentParser() + parser.add_argument("-v", "--verbose", action="store_true", + help="Increase output verbosity") + parser.add_argument("--debug", action="store_true", + help="Debug message output") + parser.add_argument("-q", "--quiet", action="store_true", + help="Run quiet with no message output") + parser.add_argument("-V", "--version", action="version", + version="stocksight v%s" % STOCKSIGHT_VERSION, + help="Prints version and exits") + args = parser.parse_args() + + if args.verbose: + logger.setLevel(logging.INFO) + eslogger.setLevel(logging.INFO) + requestslogger.setLevel(logging.INFO) + if args.debug: + logger.setLevel(logging.DEBUG) + eslogger.setLevel(logging.DEBUG) + requestslogger.setLevel(logging.DEBUG) + if args.quiet: + logger.disabled = True + eslogger.disabled = True + requestslogger.disabled = True + + + # set up elasticsearch mappings and create index + mappings = { + "mappings": { + "newsheadline": { + "properties": { + "msg_id": { + "type": "string" + }, + "date": { + "type": "date" + }, + "location": { + "type": "string", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "message": { + "type": "string", + "fields": { + "english": { + "type": "string", + "analyzer": "english" + }, + "keyword": { + "type": "keyword" + } + } + }, + "polarity": { + "type": "float" + }, + "subjectivity": { + "type": "float" + }, + "sentiment": { + "type": "string", + "fields": { + "keyword": { + "type": "keyword" + } + } + } + } + } + } + } + + try: + for symbol in symbols: + try: + logger.info('Creating new Elasticsearch index or using existing ' + symbol) + es.indices.create(index=symbol, body=mappings, ignore=[400, 404]) + url = "https://finance.yahoo.com/quote/%s/?p=%s" % (symbol, symbol) + + logger.info('NLTK tokens required: ' + str(nltk_tokens_required)) + logger.info('NLTK tokens ignored: ' + str(nltk_tokens_ignored)) + logger.info("Scraping news for %s from %s ..." % (symbol, url)) + + # create instance of NewsHeadlineListener + newslistener = NewsHeadlineListener(symbol, url) + except Exception as e: + logger.warning("%s" % e) + pass + + except KeyboardInterrupt: + print("Ctrl-c keyboard interrupt, exiting...") + sys.exit(0) \ No newline at end of file diff --git a/src/startup.sh b/src/startup.sh index 4adc01b..ebb39eb 100755 --- a/src/startup.sh +++ b/src/startup.sh @@ -1,16 +1,11 @@ #!/bin/bash -sleep 30 -python sentiment.py -n TSLA -i tsla & -sleep 1 -python stockprice.py -s TSLA -i tsla & -sleep 1 -python sentiment.py -n AMD -i amd & -sleep 1 -python stockprice.py -s AMD -i amd & +sleep 20; while true do + #python stockprice.py -s AMD -i amd & + python news.sentiment.py & sleep 3600 done From 92a9447c03c3fe654b7a45815fff32217e71ec46 Mon Sep 17 00:00:00 2001 From: heyqule Date: Thu, 29 Aug 2019 02:56:56 -0400 Subject: [PATCH 15/55] Break down News SA Add Redis support --- docker-compose.yml | 4 ++-- src/{ => Sentiment}/Helper/Sentiment.py | 3 +-- src/Sentiment/Helper/__init__.py | 0 .../Initializer/ElasticSearchInit.py | 0 src/{ => Sentiment}/Initializer/LoggerInit.py | 0 src/{ => Sentiment}/Initializer/RedisInit.py | 0 src/Sentiment/Initializer/__init__.py | 0 .../Initializer/str_unicode.py | 0 src/{ => Sentiment}/NewsHeadlineListener.py | 22 +++++++++---------- src/Sentiment/__init__.py | 0 src/delindex.py | 4 ++-- src/news.sentiment.py | 9 +------- src/startup.sh | 2 +- 13 files changed, 18 insertions(+), 26 deletions(-) rename src/{ => Sentiment}/Helper/Sentiment.py (98%) create mode 100644 src/Sentiment/Helper/__init__.py rename src/{ => Sentiment}/Initializer/ElasticSearchInit.py (100%) rename src/{ => Sentiment}/Initializer/LoggerInit.py (100%) rename src/{ => Sentiment}/Initializer/RedisInit.py (100%) create mode 100644 src/Sentiment/Initializer/__init__.py rename src/{ => Sentiment}/Initializer/str_unicode.py (100%) rename src/{ => Sentiment}/NewsHeadlineListener.py (89%) create mode 100644 src/Sentiment/__init__.py diff --git a/docker-compose.yml b/docker-compose.yml index 9e5eba0..943f6e8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -22,8 +22,8 @@ services: soft: 2048 hard: 2048 #expose this for local dev only! - #ports: - # - "9200:9200" + ports: + - "9200:9200" redis: build: context: ./redis-docker diff --git a/src/Helper/Sentiment.py b/src/Sentiment/Helper/Sentiment.py similarity index 98% rename from src/Helper/Sentiment.py rename to src/Sentiment/Helper/Sentiment.py index 62eca9c..a53a1f8 100644 --- a/src/Helper/Sentiment.py +++ b/src/Sentiment/Helper/Sentiment.py @@ -7,8 +7,7 @@ from textblob import TextBlob from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer -from Initializer.LoggerInit import * - +from Sentiment.Initializer.LoggerInit import * def get_page_text(url): diff --git a/src/Sentiment/Helper/__init__.py b/src/Sentiment/Helper/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/Initializer/ElasticSearchInit.py b/src/Sentiment/Initializer/ElasticSearchInit.py similarity index 100% rename from src/Initializer/ElasticSearchInit.py rename to src/Sentiment/Initializer/ElasticSearchInit.py diff --git a/src/Initializer/LoggerInit.py b/src/Sentiment/Initializer/LoggerInit.py similarity index 100% rename from src/Initializer/LoggerInit.py rename to src/Sentiment/Initializer/LoggerInit.py diff --git a/src/Initializer/RedisInit.py b/src/Sentiment/Initializer/RedisInit.py similarity index 100% rename from src/Initializer/RedisInit.py rename to src/Sentiment/Initializer/RedisInit.py diff --git a/src/Sentiment/Initializer/__init__.py b/src/Sentiment/Initializer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/Initializer/str_unicode.py b/src/Sentiment/Initializer/str_unicode.py similarity index 100% rename from src/Initializer/str_unicode.py rename to src/Sentiment/Initializer/str_unicode.py diff --git a/src/NewsHeadlineListener.py b/src/Sentiment/NewsHeadlineListener.py similarity index 89% rename from src/NewsHeadlineListener.py rename to src/Sentiment/NewsHeadlineListener.py index ea99ef4..613431e 100644 --- a/src/NewsHeadlineListener.py +++ b/src/Sentiment/NewsHeadlineListener.py @@ -1,9 +1,8 @@ +import hashlib import re -import time from datetime import datetime import nltk -import hashlib try: import urllib.parse as urlparse @@ -11,12 +10,10 @@ import urlparse from config import * -from Initializer.str_unicode import * -from Helper.Sentiment import * -from Initializer.ElasticSearchInit import es -from Initializer.RedisInit import rds -from Initializer.LoggerInit import * - +from Sentiment.Initializer.ElasticSearchInit import es +from Sentiment.Initializer.str_unicode import * +from Sentiment.Initializer.RedisInit import rds +from Sentiment.Helper.Sentiment import * class NewsHeadlineListener: @@ -29,8 +26,9 @@ def __init__(self, symbol,url=None): # add any new headlines for htext, htext_url in new_headlines: - md5Hash = hashlib.md5( (htext+htext_url).encode() ).hexdigest() - if rds.exists(md5Hash): + md5_hash = hashlib.md5((htext+htext_url).encode()).hexdigest() + + if rds.exists(md5_hash) is 0: datenow = datetime.utcnow().isoformat() # output news data @@ -49,6 +47,7 @@ def __init__(self, symbol,url=None): for t in nltk_tokens_ignored: if t in tokens: logger.info("Text contains token from ignore list, not adding") + rds.set(md5_hash,1,2628000) continue # check required tokens from config tokenspass = False @@ -65,6 +64,7 @@ def __init__(self, symbol,url=None): break if not tokenspass: logger.info("Text does not contain token from required list, not adding") + rds.set(md5_hash,1,2628000) continue # get sentiment values @@ -80,7 +80,7 @@ def __init__(self, symbol,url=None): "polarity": polarity, "subjectivity": subjectivity, "sentiment": sentiment}) - rds.set(md5Hash,True) + rds.set(md5_hash,1,2628000) def get_news_headlines(self, url): diff --git a/src/Sentiment/__init__.py b/src/Sentiment/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/delindex.py b/src/delindex.py index 03b85be..c80737a 100644 --- a/src/delindex.py +++ b/src/delindex.py @@ -1,7 +1,7 @@ import argparse -from Initializer.LoggerInit import * -from Initializer.ElasticSearchInit import es +from Sentiment.Initializer.ElasticSearchInit import es +from Sentiment.Initializer.LoggerInit import * if __name__ == '__main__': diff --git a/src/news.sentiment.py b/src/news.sentiment.py index 5e7b227..1e79a91 100644 --- a/src/news.sentiment.py +++ b/src/news.sentiment.py @@ -11,13 +11,6 @@ """ import argparse -import json -import re -import sys -import time - -import nltk -import requests try: import urllib.parse as urlparse @@ -25,7 +18,7 @@ import urlparse # import elasticsearch host, twitter keys and tokens -from NewsHeadlineListener import * +from Sentiment.NewsHeadlineListener import * STOCKSIGHT_VERSION = '0.1-b.6' diff --git a/src/startup.sh b/src/startup.sh index ebb39eb..7a9d77f 100755 --- a/src/startup.sh +++ b/src/startup.sh @@ -1,7 +1,7 @@ #!/bin/bash -sleep 20; +sleep 30; while true do From 4205c8c56222afa9cc7dcc212b31a2238753b376 Mon Sep 17 00:00:00 2001 From: heyqule Date: Thu, 29 Aug 2019 03:03:48 -0400 Subject: [PATCH 16/55] remove exposed ports --- docker-compose.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 943f6e8..efa376f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -22,11 +22,14 @@ services: soft: 2048 hard: 2048 #expose this for local dev only! - ports: - - "9200:9200" + #ports: + # - "9200:9200" redis: build: context: ./redis-docker + #expose this for local dev only! + #ports: + # - "6379:6379" kibana: image: docker.elastic.co/kibana/kibana:5.6.16 depends_on: From ff5a8cf4fde0b50dfdc63f0aab959520decca3d5 Mon Sep 17 00:00:00 2001 From: heyqule Date: Sat, 31 Aug 2019 01:07:29 -0400 Subject: [PATCH 17/55] Elasticsearch / Kibana 7.3 change --- docker-compose.yml | 16 +++++++++------- kibana_exports/export.7.3.ndjson | 6 ++++++ src/Sentiment/NewsHeadlineListener.py | 9 ++++++--- src/delindex.py | 4 ++-- src/news.sentiment.py | 8 +++----- 5 files changed, 26 insertions(+), 17 deletions(-) create mode 100644 kibana_exports/export.7.3.ndjson diff --git a/docker-compose.yml b/docker-compose.yml index efa376f..98064de 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,9 +4,11 @@ version: '3' services: elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:5.6.16 + image: docker.elastic.co/elasticsearch/elasticsearch:7.3.1 environment: - - cluster.name=docker-cluster + - cluster.name=elasticsearch + - node.name=stockdata + - cluster.initial_master_nodes=stockdata - bootstrap.memory_lock=true - "ES_JAVA_OPTS=-Xms512m -Xmx512m" volumes: @@ -19,11 +21,11 @@ services: soft: 65536 hard: 65536 nproc: - soft: 2048 - hard: 2048 + soft: 4096 + hard: 4096 #expose this for local dev only! - #ports: - # - "9200:9200" + ports: + - "9200:9200" redis: build: context: ./redis-docker @@ -31,7 +33,7 @@ services: #ports: # - "6379:6379" kibana: - image: docker.elastic.co/kibana/kibana:5.6.16 + image: docker.elastic.co/kibana/kibana:7.3.1 depends_on: - elasticsearch ports: diff --git a/kibana_exports/export.7.3.ndjson b/kibana_exports/export.7.3.ndjson new file mode 100644 index 0000000..f6d0628 --- /dev/null +++ b/kibana_exports/export.7.3.ndjson @@ -0,0 +1,6 @@ +{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"esTypes\":[\"_id\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"esTypes\":[\"_index\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"esTypes\":[\"_source\"],\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"esTypes\":[\"_type\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"date\",\"type\":\"date\",\"esTypes\":[\"date\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"location\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"location.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"location\",\"subType\":\"multi\"},{\"name\":\"message\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"message.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"message\",\"subType\":\"multi\"},{\"name\":\"polarity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"sentiment\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"sentiment.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"sentiment\",\"subType\":\"multi\"},{\"name\":\"subjectivity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"date","title":"stocksight_tmpl"},"id":"tmpl_index_pattern","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2019-08-31T04:11:15.828Z","version":"WzcsMV0="} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl_polarity","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl_polarity\",\"type\":\"metric\",\"params\":{\"metric\":{\"percentageMode\":false,\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"metricColorMode\":\"None\",\"colorsRange\":[{\"type\":\"range\",\"from\":0,\"to\":10000}],\"labels\":{\"show\":true},\"invertColors\":false,\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\",\"fontSize\":32}},\"dimensions\":{\"metrics\":[{\"type\":\"vis_dimension\",\"accessor\":0,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":1,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":2,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":3,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":4,\"format\":{\"id\":\"number\",\"params\":{}}}]},\"addTooltip\":true,\"addLegend\":false,\"type\":\"metric\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"median\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\",\"percents\":[50],\"customLabel\":\"\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"5\",\"enabled\":true,\"type\":\"min\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}}]}"},"id":"2faf7b70-cba6-11e9-aabf-190f41383a44","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl_index_pattern","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-08-31T04:17:03.654Z","version":"WzgsMV0="} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl_sentinel","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl_sentinel\",\"type\":\"pie\",\"params\":{\"type\":\"pie\",\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"top\",\"isDonut\":false,\"labels\":{\"show\":true,\"values\":true,\"last_level\":true,\"truncate\":100},\"dimensions\":{\"metric\":{\"accessor\":1,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"},\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":5,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\"}}]}"},"id":"8a885170-cba6-11e9-aabf-190f41383a44","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl_index_pattern","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-08-31T04:19:36.071Z","version":"WzksMV0="} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl_articles","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl_articles\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}}]}"},"id":"a775ddc0-cba6-11e9-aabf-190f41383a44","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl_index_pattern","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-08-31T04:20:24.604Z","version":"WzEwLDFd"} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl_titles","uiStateJSON":"{\"vis\":{\"params\":{\"sort\":{\"columnIndex\":0,\"direction\":\"asc\"}}}}","version":1,"visState":"{\"title\":\"tmpl_titles\",\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":0,\"direction\":\"asc\"},\"showTotal\":false,\"totalFunc\":\"sum\",\"dimensions\":{\"metrics\":[{\"accessor\":1,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"},{\"accessor\":2,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"}],\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\",\"customLabel\":\"Polarity\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"message.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":100,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"Title\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"subjectivity\",\"customLabel\":\"Subjectivity\"}}]}"},"id":"277680e0-cba9-11e9-aabf-190f41383a44","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl_index_pattern","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-08-31T04:39:07.986Z","version":"WzEzLDFd"} +{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[]}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"gridData\":{\"x\":10,\"y\":0,\"w\":14,\"h\":15,\"i\":\"1\"},\"version\":\"7.3.1\",\"panelIndex\":\"1\",\"embeddableConfig\":{},\"title\":\"Polarity\",\"panelRefName\":\"panel_0\"},{\"gridData\":{\"x\":0,\"y\":0,\"w\":10,\"h\":15,\"i\":\"2\"},\"version\":\"7.3.1\",\"panelIndex\":\"2\",\"embeddableConfig\":{\"vis\":{\"legendOpen\":false}},\"title\":\"Sentiniel\",\"panelRefName\":\"panel_1\"},{\"gridData\":{\"x\":24,\"y\":0,\"w\":24,\"h\":15,\"i\":\"3\"},\"version\":\"7.3.1\",\"panelIndex\":\"3\",\"embeddableConfig\":{},\"title\":\"Article Count\",\"panelRefName\":\"panel_2\"},{\"gridData\":{\"x\":0,\"y\":15,\"w\":24,\"h\":18,\"i\":\"4\"},\"version\":\"7.3.1\",\"panelIndex\":\"4\",\"embeddableConfig\":{},\"title\":\"Title Analysis\",\"panelRefName\":\"panel_3\"}]","timeRestore":false,"title":"tmpl_dashboard","version":1},"id":"eefa1870-cba9-11e9-aabf-190f41383a44","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"2faf7b70-cba6-11e9-aabf-190f41383a44","name":"panel_0","type":"visualization"},{"id":"8a885170-cba6-11e9-aabf-190f41383a44","name":"panel_1","type":"visualization"},{"id":"a775ddc0-cba6-11e9-aabf-190f41383a44","name":"panel_2","type":"visualization"},{"id":"277680e0-cba9-11e9-aabf-190f41383a44","name":"panel_3","type":"visualization"}],"type":"dashboard","updated_at":"2019-08-31T04:57:26.923Z","version":"WzI1LDFd"} \ No newline at end of file diff --git a/src/Sentiment/NewsHeadlineListener.py b/src/Sentiment/NewsHeadlineListener.py index 613431e..5d71682 100644 --- a/src/Sentiment/NewsHeadlineListener.py +++ b/src/Sentiment/NewsHeadlineListener.py @@ -27,6 +27,7 @@ def __init__(self, symbol,url=None): for htext, htext_url in new_headlines: md5_hash = hashlib.md5((htext+htext_url).encode()).hexdigest() + logger.debug("Hash"+md5_hash) if rds.exists(md5_hash) is 0: @@ -49,8 +50,7 @@ def __init__(self, symbol,url=None): logger.info("Text contains token from ignore list, not adding") rds.set(md5_hash,1,2628000) continue - # check required tokens from config - tokenspass = False + if self.symbol in nltk_tokens_required: @@ -58,10 +58,13 @@ def __init__(self, symbol,url=None): else: nltk_tokens = nltk_tokens_required['default'] + # check required tokens from config + tokenspass = False for t in nltk_tokens: if t in tokens: tokenspass = True break + if not tokenspass: logger.info("Text does not contain token from required list, not adding") rds.set(md5_hash,1,2628000) @@ -72,7 +75,7 @@ def __init__(self, symbol,url=None): logger.info("Adding news headline to elasticsearch") # add news headline data and sentiment info to elasticsearch - es.index(index=self.symbol, + es.index(index='stocksight_'+self.symbol, doc_type="newsheadline", body={"date": datenow, "location": htext_url, diff --git a/src/delindex.py b/src/delindex.py index c80737a..0fc9256 100644 --- a/src/delindex.py +++ b/src/delindex.py @@ -7,12 +7,12 @@ parser = argparse.ArgumentParser() parser.add_argument("-d", "--delindex", action="store_true", - help="Delete existing Elasticsearch index first") + help="Delete existing Elasticsearch index") args = parser.parse_args() eslogger = logging.getLogger('elasticsearch') eslogger.setLevel(logging.INFO) if args.delindex: - eslogger.info('Deleting existing Elasticsearch index ' + args.index) + eslogger.info('Deleting existing Elasticsearch index ' + args.delindex) es.indices.delete(index=args.index, ignore=[400, 404]) diff --git a/src/news.sentiment.py b/src/news.sentiment.py index 1e79a91..c9cec40 100644 --- a/src/news.sentiment.py +++ b/src/news.sentiment.py @@ -11,11 +11,8 @@ """ import argparse - -try: - import urllib.parse as urlparse -except ImportError: - import urlparse +import time +import random # import elasticsearch host, twitter keys and tokens from Sentiment.NewsHeadlineListener import * @@ -122,6 +119,7 @@ # create instance of NewsHeadlineListener newslistener = NewsHeadlineListener(symbol, url) + time.sleep(random.randrange(2,5)) except Exception as e: logger.warning("%s" % e) pass From e3108868c095f16c4196f2d5f20dd4c327a0e802 Mon Sep 17 00:00:00 2001 From: heyqule Date: Sat, 31 Aug 2019 02:07:43 -0400 Subject: [PATCH 18/55] Add ndjson importer --- .../export.7.3.ndjson | 0 {kibana_exports => kibana_export}/export.json | 0 src/import.kibana.py | 35 +++++++++++++++++++ src/news.sentiment.py | 1 - src/startup.sh | 3 +- 5 files changed, 37 insertions(+), 2 deletions(-) rename {kibana_exports => kibana_export}/export.7.3.ndjson (100%) rename {kibana_exports => kibana_export}/export.json (100%) create mode 100644 src/import.kibana.py diff --git a/kibana_exports/export.7.3.ndjson b/kibana_export/export.7.3.ndjson similarity index 100% rename from kibana_exports/export.7.3.ndjson rename to kibana_export/export.7.3.ndjson diff --git a/kibana_exports/export.json b/kibana_export/export.json similarity index 100% rename from kibana_exports/export.json rename to kibana_export/export.json diff --git a/src/import.kibana.py b/src/import.kibana.py new file mode 100644 index 0000000..fbde2f9 --- /dev/null +++ b/src/import.kibana.py @@ -0,0 +1,35 @@ +import requests +import sys +import os.path +from config import symbols + +if __name__ == '__main__': + + try: + template_file = open('../kibana_export/export.7.3.ndjson', "rt", encoding='utf-8') + import_template = template_file.read() + template_file.close() + + for symbol in symbols: + try: + ndjson_file_path = '../kibana_export/'+symbol+'_exports.ndjson' + if os.path.exists(ndjson_file_path) is False: + ndjson_file = open(ndjson_file_path, "xt", encoding='utf-8') + final_text = import_template.replace('tmpl',symbol) + ndjson_file.write(final_text) + ndjson_file.close() + + kibana_import_url = 'http://kibana:5601/api/saved_objects/_import' + payload = { 'overwrite': 'false'} + headers ={'kbn-xsrf': 'True'} + post = requests.post(kibana_import_url, headers=headers, files={'file': ndjson_file_path}) + print("Import %s result" % symbol) + print(post.text) + + except Exception as e: + print(e.with_traceback(e.__traceback__)); + pass + + except KeyboardInterrupt: + print("Ctrl-c keyboard interrupt, exiting...") + sys.exit(0) \ No newline at end of file diff --git a/src/news.sentiment.py b/src/news.sentiment.py index c9cec40..46ccd8b 100644 --- a/src/news.sentiment.py +++ b/src/news.sentiment.py @@ -14,7 +14,6 @@ import time import random -# import elasticsearch host, twitter keys and tokens from Sentiment.NewsHeadlineListener import * diff --git a/src/startup.sh b/src/startup.sh index 7a9d77f..171e39f 100755 --- a/src/startup.sh +++ b/src/startup.sh @@ -1,8 +1,9 @@ #!/bin/bash - sleep 30; +python import.kibana.py + while true do #python stockprice.py -s AMD -i amd & From 10502c86a9e9f23840c5076ee21e1a7aeed31a05 Mon Sep 17 00:00:00 2001 From: heyqule Date: Sat, 31 Aug 2019 02:19:49 -0400 Subject: [PATCH 19/55] Add ndjson importer --- kibana_export/export.7.3.ndjson | 10 +++++----- src/import.kibana.py | 7 ++++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/kibana_export/export.7.3.ndjson b/kibana_export/export.7.3.ndjson index f6d0628..fe85513 100644 --- a/kibana_export/export.7.3.ndjson +++ b/kibana_export/export.7.3.ndjson @@ -1,6 +1,6 @@ {"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"esTypes\":[\"_id\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"esTypes\":[\"_index\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"esTypes\":[\"_source\"],\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"esTypes\":[\"_type\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"date\",\"type\":\"date\",\"esTypes\":[\"date\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"location\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"location.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"location\",\"subType\":\"multi\"},{\"name\":\"message\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"message.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"message\",\"subType\":\"multi\"},{\"name\":\"polarity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"sentiment\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"sentiment.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"sentiment\",\"subType\":\"multi\"},{\"name\":\"subjectivity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"date","title":"stocksight_tmpl"},"id":"tmpl_index_pattern","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2019-08-31T04:11:15.828Z","version":"WzcsMV0="} -{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl_polarity","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl_polarity\",\"type\":\"metric\",\"params\":{\"metric\":{\"percentageMode\":false,\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"metricColorMode\":\"None\",\"colorsRange\":[{\"type\":\"range\",\"from\":0,\"to\":10000}],\"labels\":{\"show\":true},\"invertColors\":false,\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\",\"fontSize\":32}},\"dimensions\":{\"metrics\":[{\"type\":\"vis_dimension\",\"accessor\":0,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":1,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":2,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":3,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":4,\"format\":{\"id\":\"number\",\"params\":{}}}]},\"addTooltip\":true,\"addLegend\":false,\"type\":\"metric\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"median\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\",\"percents\":[50],\"customLabel\":\"\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"5\",\"enabled\":true,\"type\":\"min\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}}]}"},"id":"2faf7b70-cba6-11e9-aabf-190f41383a44","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl_index_pattern","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-08-31T04:17:03.654Z","version":"WzgsMV0="} -{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl_sentinel","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl_sentinel\",\"type\":\"pie\",\"params\":{\"type\":\"pie\",\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"top\",\"isDonut\":false,\"labels\":{\"show\":true,\"values\":true,\"last_level\":true,\"truncate\":100},\"dimensions\":{\"metric\":{\"accessor\":1,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"},\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":5,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\"}}]}"},"id":"8a885170-cba6-11e9-aabf-190f41383a44","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl_index_pattern","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-08-31T04:19:36.071Z","version":"WzksMV0="} -{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl_articles","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl_articles\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}}]}"},"id":"a775ddc0-cba6-11e9-aabf-190f41383a44","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl_index_pattern","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-08-31T04:20:24.604Z","version":"WzEwLDFd"} -{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl_titles","uiStateJSON":"{\"vis\":{\"params\":{\"sort\":{\"columnIndex\":0,\"direction\":\"asc\"}}}}","version":1,"visState":"{\"title\":\"tmpl_titles\",\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":0,\"direction\":\"asc\"},\"showTotal\":false,\"totalFunc\":\"sum\",\"dimensions\":{\"metrics\":[{\"accessor\":1,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"},{\"accessor\":2,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"}],\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\",\"customLabel\":\"Polarity\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"message.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":100,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"Title\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"subjectivity\",\"customLabel\":\"Subjectivity\"}}]}"},"id":"277680e0-cba9-11e9-aabf-190f41383a44","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl_index_pattern","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-08-31T04:39:07.986Z","version":"WzEzLDFd"} -{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[]}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"gridData\":{\"x\":10,\"y\":0,\"w\":14,\"h\":15,\"i\":\"1\"},\"version\":\"7.3.1\",\"panelIndex\":\"1\",\"embeddableConfig\":{},\"title\":\"Polarity\",\"panelRefName\":\"panel_0\"},{\"gridData\":{\"x\":0,\"y\":0,\"w\":10,\"h\":15,\"i\":\"2\"},\"version\":\"7.3.1\",\"panelIndex\":\"2\",\"embeddableConfig\":{\"vis\":{\"legendOpen\":false}},\"title\":\"Sentiniel\",\"panelRefName\":\"panel_1\"},{\"gridData\":{\"x\":24,\"y\":0,\"w\":24,\"h\":15,\"i\":\"3\"},\"version\":\"7.3.1\",\"panelIndex\":\"3\",\"embeddableConfig\":{},\"title\":\"Article Count\",\"panelRefName\":\"panel_2\"},{\"gridData\":{\"x\":0,\"y\":15,\"w\":24,\"h\":18,\"i\":\"4\"},\"version\":\"7.3.1\",\"panelIndex\":\"4\",\"embeddableConfig\":{},\"title\":\"Title Analysis\",\"panelRefName\":\"panel_3\"}]","timeRestore":false,"title":"tmpl_dashboard","version":1},"id":"eefa1870-cba9-11e9-aabf-190f41383a44","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"2faf7b70-cba6-11e9-aabf-190f41383a44","name":"panel_0","type":"visualization"},{"id":"8a885170-cba6-11e9-aabf-190f41383a44","name":"panel_1","type":"visualization"},{"id":"a775ddc0-cba6-11e9-aabf-190f41383a44","name":"panel_2","type":"visualization"},{"id":"277680e0-cba9-11e9-aabf-190f41383a44","name":"panel_3","type":"visualization"}],"type":"dashboard","updated_at":"2019-08-31T04:57:26.923Z","version":"WzI1LDFd"} \ No newline at end of file +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl_polarity","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl_polarity\",\"type\":\"metric\",\"params\":{\"metric\":{\"percentageMode\":false,\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"metricColorMode\":\"None\",\"colorsRange\":[{\"type\":\"range\",\"from\":0,\"to\":10000}],\"labels\":{\"show\":true},\"invertColors\":false,\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\",\"fontSize\":32}},\"dimensions\":{\"metrics\":[{\"type\":\"vis_dimension\",\"accessor\":0,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":1,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":2,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":3,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":4,\"format\":{\"id\":\"number\",\"params\":{}}}]},\"addTooltip\":true,\"addLegend\":false,\"type\":\"metric\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"median\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\",\"percents\":[50],\"customLabel\":\"\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"5\",\"enabled\":true,\"type\":\"min\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}}]}"},"id":"tmpl_polarity_visual","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl_index_pattern","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-08-31T04:17:03.654Z","version":"WzgsMV0="} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl_sentinel","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl_sentinel\",\"type\":\"pie\",\"params\":{\"type\":\"pie\",\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"top\",\"isDonut\":false,\"labels\":{\"show\":true,\"values\":true,\"last_level\":true,\"truncate\":100},\"dimensions\":{\"metric\":{\"accessor\":1,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"},\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":5,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\"}}]}"},"id":"tmpl_polarity_sentinel","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl_index_pattern","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-08-31T04:19:36.071Z","version":"WzksMV0="} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl_articles","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl_articles\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}}]}"},"id":"tmpl_articles","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl_index_pattern","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-08-31T04:20:24.604Z","version":"WzEwLDFd"} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl_titles","uiStateJSON":"{\"vis\":{\"params\":{\"sort\":{\"columnIndex\":0,\"direction\":\"asc\"}}}}","version":1,"visState":"{\"title\":\"tmpl_titles\",\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":0,\"direction\":\"asc\"},\"showTotal\":false,\"totalFunc\":\"sum\",\"dimensions\":{\"metrics\":[{\"accessor\":1,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"},{\"accessor\":2,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"}],\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\",\"customLabel\":\"Polarity\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"message.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":100,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"Title\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"subjectivity\",\"customLabel\":\"Subjectivity\"}}]}"},"id":"tmpl_titltes","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl_index_pattern","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-08-31T04:39:07.986Z","version":"WzEzLDFd"} +{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[]}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"gridData\":{\"x\":10,\"y\":0,\"w\":14,\"h\":15,\"i\":\"1\"},\"version\":\"7.3.1\",\"panelIndex\":\"1\",\"embeddableConfig\":{},\"title\":\"Polarity\",\"panelRefName\":\"panel_0\"},{\"gridData\":{\"x\":0,\"y\":0,\"w\":10,\"h\":15,\"i\":\"2\"},\"version\":\"7.3.1\",\"panelIndex\":\"2\",\"embeddableConfig\":{\"vis\":{\"legendOpen\":false}},\"title\":\"Sentiniel\",\"panelRefName\":\"panel_1\"},{\"gridData\":{\"x\":24,\"y\":0,\"w\":24,\"h\":15,\"i\":\"3\"},\"version\":\"7.3.1\",\"panelIndex\":\"3\",\"embeddableConfig\":{},\"title\":\"Article Count\",\"panelRefName\":\"panel_2\"},{\"gridData\":{\"x\":0,\"y\":15,\"w\":24,\"h\":18,\"i\":\"4\"},\"version\":\"7.3.1\",\"panelIndex\":\"4\",\"embeddableConfig\":{},\"title\":\"Title Analysis\",\"panelRefName\":\"panel_3\"}]","timeRestore":false,"title":"tmpl_dashboard","version":1},"id":"tmpl_dashboard","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"tmpl_polarity_visual","name":"panel_0","type":"visualization"},{"id":"tmpl_polarity_sentinel","name":"panel_1","type":"visualization"},{"id":"tmpl_articles","name":"panel_2","type":"visualization"},{"id":"tmpl_titltes","name":"panel_3","type":"visualization"}],"type":"dashboard","updated_at":"2019-08-31T04:57:26.923Z","version":"WzI1LDFd"} \ No newline at end of file diff --git a/src/import.kibana.py b/src/import.kibana.py index fbde2f9..cc48da9 100644 --- a/src/import.kibana.py +++ b/src/import.kibana.py @@ -22,12 +22,13 @@ kibana_import_url = 'http://kibana:5601/api/saved_objects/_import' payload = { 'overwrite': 'false'} headers ={'kbn-xsrf': 'True'} - post = requests.post(kibana_import_url, headers=headers, files={'file': ndjson_file_path}) - print("Import %s result" % symbol) + post = requests.request('POST',kibana_import_url, headers=headers, files={'file': open(ndjson_file_path, "rt", encoding='utf-8')}) + print("Import %s Kibana Dashboard" % symbol) + print(ndjson_file_path) print(post.text) except Exception as e: - print(e.with_traceback(e.__traceback__)); + print(e); pass except KeyboardInterrupt: From 76751541be5dd69f1d72184d57e477fa043f9db7 Mon Sep 17 00:00:00 2001 From: heyqule Date: Sat, 31 Aug 2019 02:40:19 -0400 Subject: [PATCH 20/55] Remove kibana 5.6 export --- kibana_export/export.json | 114 -------------------------------------- 1 file changed, 114 deletions(-) delete mode 100644 kibana_export/export.json diff --git a/kibana_export/export.json b/kibana_export/export.json deleted file mode 100644 index bdfdfa2..0000000 --- a/kibana_export/export.json +++ /dev/null @@ -1,114 +0,0 @@ -[ - { - "_id": "tmpl-stock-dashboard", - "_type": "dashboard", - "_source": { - "title": "tmpl_dashboard", - "hits": 0, - "description": "", - "panelsJSON": "[{\"col\":1,\"id\":\"tmpl-stock-visualization-2\",\"panelIndex\":1,\"row\":3,\"size_x\":3,\"size_y\":3,\"type\":\"visualization\"},{\"col\":9,\"id\":\"tmpl-stock-visualization-4\",\"panelIndex\":2,\"row\":3,\"size_x\":4,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"columns\":[\"author\",\"location\",\"message\",\"polarity\",\"subjectivity\",\"sentiment\"],\"id\":\"tmpl-stock-search\",\"panelIndex\":3,\"row\":6,\"size_x\":12,\"size_y\":4,\"sort\":[\"date\",\"desc\"],\"type\":\"search\"},{\"col\":1,\"id\":\"tmpl-stock-visualization-5\",\"panelIndex\":4,\"row\":10,\"size_x\":12,\"size_y\":4,\"type\":\"visualization\"},{\"col\":4,\"id\":\"tmpl-stock-visualization-3\",\"panelIndex\":5,\"row\":3,\"size_x\":5,\"size_y\":3,\"type\":\"visualization\"},{\"col\":1,\"id\":\"tmpl-stock-visualization-1\",\"panelIndex\":6,\"row\":1,\"size_x\":12,\"size_y\":2,\"type\":\"visualization\"}]", - "optionsJSON": "{\"darkTheme\":true}", - "uiStateJSON": "{\"P-2\":{\"vis\":{\"legendOpen\":true}},\"P-6\":{\"vis\":{\"defaultColors\":{\"0 - 1\":\"rgb(0,104,55)\"}}}}", - "version": 1, - "timeRestore": false, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"filter\":[{\"query\":{\"match_all\":{}}}],\"highlightAll\":true,\"version\":true}" - } - } - }, - { - "_id": "tmpl-stock-search", - "_type": "search", - "_source": { - "title": "tmpl_savesearch", - "description": "", - "hits": 0, - "columns": [ - "author", - "location", - "message", - "polarity", - "subjectivity", - "sentiment" - ], - "sort": [ - "date", - "desc" - ], - "version": 1, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"tmpl\",\"highlightAll\":true,\"version\":true,\"query\":{\"match_all\":{}},\"filter\":[{\"meta\":{\"index\":\"tmpl\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"_type\",\"value\":\"tweet\"},\"query\":{\"match\":{\"_type\":{\"query\":\"tweet\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" - } - } - }, - { - "_id": "tmpl-stock-visualization-1", - "_type": "visualization", - "_source": { - "title": "tmpl_polarity", - "visState": "{\"title\":\"tmpl_polarity\",\"type\":\"metric\",\"params\":{\"addTooltip\":true,\"addLegend\":false,\"type\":\"gauge\",\"gauge\":{\"verticalSplit\":false,\"autoExtend\":false,\"percentageMode\":false,\"gaugeType\":\"Metric\",\"gaugeStyle\":\"Full\",\"backStyle\":\"Full\",\"orientation\":\"vertical\",\"colorSchema\":\"Green to Red\",\"gaugeColorMode\":\"None\",\"useRange\":false,\"colorsRange\":[{\"from\":0,\"to\":1}],\"invertColors\":false,\"labels\":{\"show\":true,\"color\":\"black\"},\"scale\":{\"show\":false,\"labels\":false,\"color\":\"#333\",\"width\":2},\"type\":\"simple\",\"style\":{\"fontSize\":\"24\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\"},\"extendRange\":false}},\"aggs\":[{\"id\":\"5\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"median\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\",\"percents\":[50]}},{\"id\":\"2\",\"enabled\":true,\"type\":\"min\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}}],\"listeners\":{}}", - "uiStateJSON": "{\"vis\":{\"defaultColors\":{\"0 - 1\":\"rgb(0,104,55)\"}}}", - "description": "", - "version": 1, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"tmpl\",\"query\":{\"match_all\":{}},\"filter\":[]}" - } - } - }, - { - "_id": "tmpl-stock-visualization-2", - "_type": "visualization", - "_source": { - "title": "tmpl_sentinel", - "visState": "{\"title\":\"tmpl_sentinel\",\"type\":\"pie\",\"params\":{\"addLegend\":true,\"addTooltip\":true,\"isDonut\":false,\"legendPosition\":\"bottom\",\"type\":\"pie\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment.keyword\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\"}}],\"listeners\":{}}", - "uiStateJSON": "{}", - "description": "", - "version": 1, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"tmpl\",\"query\":{\"match_all\":{}},\"filter\":[]}" - } - } - }, - { - "_id": "tmpl-stock-visualization-3", - "_type": "visualization", - "_source": { - "title": "tmpl_stockprice", - "visState": "{\"title\":\"tmpl_stockprice\",\"type\":\"line\",\"params\":{\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{\"text\":\"date per 30 seconds\"}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Avg of price_last\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Avg of price_last\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"3\",\"label\":\"Avg of price_high\"},\"valueAxis\":\"ValueAxis-1\"},{\"show\":true,\"mode\":\"normal\",\"type\":\"line\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"data\":{\"id\":\"4\",\"label\":\"Avg of price_low\"},\"valueAxis\":\"ValueAxis-1\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"bottom\",\"times\":[],\"addTimeMarker\":false,\"type\":\"line\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_last\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_high\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_low\"}}],\"listeners\":{}}", - "uiStateJSON": "{}", - "description": "", - "version": 1, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"tmpl\",\"query\":{\"match_all\":{}},\"filter\":[{\"meta\":{\"index\":\"tmpl\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"_type\",\"value\":\"stock\"},\"query\":{\"match\":{\"_type\":{\"query\":\"stock\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" - } - } - }, - { - "_id": "tmpl-stock-visualization-4", - "_type": "visualization", - "_source": { - "title": "tmpl_tweets", - "visState": "{\"title\":\"tmpl_tweets\",\"type\":\"line\",\"params\":{\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{\"text\":\"date per 30 seconds\"}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"interpolate\":\"linear\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"bottom\",\"times\":[],\"addTimeMarker\":false,\"type\":\"line\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{}}", - "uiStateJSON": "{}", - "description": "", - "version": 1, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\"index\":\"tmpl\",\"query\":{\"match_all\":{}},\"filter\":[]}" - } - } - }, - { - "_id": "tmpl-stock-visualization-5", - "_type": "visualization", - "_source": { - "title": "tmpl_wordcloud", - "visState": "{\n \"title\": \"tmpl_wordcloud\",\n \"type\": \"tagcloud\",\n \"params\": {\n \"scale\": \"linear\",\n \"orientation\": \"single\",\n \"minFontSize\": 14,\n \"maxFontSize\": 36,\n \"type\": \"tagcloud\"\n },\n \"aggs\": [\n {\n \"id\": \"1\",\n \"enabled\": true,\n \"type\": \"count\",\n \"schema\": \"metric\",\n \"params\": {}\n },\n {\n \"id\": \"2\",\n \"enabled\": true,\n \"type\": \"terms\",\n \"schema\": \"segment\",\n \"params\": {\n \"field\": \"message.keyword\",\n \"size\": 25,\n \"order\": \"desc\",\n \"orderBy\": \"1\"\n }\n }\n ],\n \"listeners\": {}\n}", - "uiStateJSON": "{}", - "description": "", - "version": 1, - "kibanaSavedObjectMeta": { - "searchSourceJSON": "{\n \"index\": \"tmpl\",\n \"query\": {\n \"match_all\": {}\n },\n \"filter\": []\n}" - } - } - } -] \ No newline at end of file From c2a70107b127abf8501c5f85c7bbe8a92baa981c Mon Sep 17 00:00:00 2001 From: heyqule Date: Mon, 2 Sep 2019 00:30:09 -0400 Subject: [PATCH 21/55] Fix kibana importer Add redis cache Drop support for unicode() Refactor StockPrice --- docker-compose.yml | 8 +- src/Sentiment/Initializer/str_unicode.py | 6 - .../Helper/Sentiment.py | 2 +- .../Helper/__init__.py | 0 .../Initializer/ElasticSearch.py} | 0 .../Initializer/Logger.py} | 0 .../Initializer/Redis.py} | 0 .../Initializer/__init__.py | 0 .../NewsHeadlineListener.py | 18 +- src/StockSight/StockPriceListener.py | 103 ++++++++++ src/{Sentiment => StockSight}/__init__.py | 0 src/delindex.py | 4 +- src/import.kibana.py | 4 +- .../kibana_export}/export.7.3.ndjson | 0 src/news.sentiment.py | 12 +- src/startup.sh | 23 ++- src/stockprice.py | 177 ++---------------- 17 files changed, 158 insertions(+), 199 deletions(-) delete mode 100644 src/Sentiment/Initializer/str_unicode.py rename src/{Sentiment => StockSight}/Helper/Sentiment.py (98%) rename src/{Sentiment => StockSight}/Helper/__init__.py (100%) rename src/{Sentiment/Initializer/ElasticSearchInit.py => StockSight/Initializer/ElasticSearch.py} (100%) rename src/{Sentiment/Initializer/LoggerInit.py => StockSight/Initializer/Logger.py} (100%) rename src/{Sentiment/Initializer/RedisInit.py => StockSight/Initializer/Redis.py} (100%) rename src/{Sentiment => StockSight}/Initializer/__init__.py (100%) rename src/{Sentiment => StockSight}/NewsHeadlineListener.py (89%) create mode 100644 src/StockSight/StockPriceListener.py rename src/{Sentiment => StockSight}/__init__.py (100%) rename {kibana_export => src/kibana_export}/export.7.3.ndjson (100%) diff --git a/docker-compose.yml b/docker-compose.yml index 98064de..e97029d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -23,15 +23,15 @@ services: nproc: soft: 4096 hard: 4096 - #expose this for local dev only! + #expose this port for local dev only! ports: - "9200:9200" redis: build: context: ./redis-docker - #expose this for local dev only! - #ports: - # - "6379:6379" + #expose this port for local dev only! + ports: + - "6379:6379" kibana: image: docker.elastic.co/kibana/kibana:7.3.1 depends_on: diff --git a/src/Sentiment/Initializer/str_unicode.py b/src/Sentiment/Initializer/str_unicode.py deleted file mode 100644 index bc371c6..0000000 --- a/src/Sentiment/Initializer/str_unicode.py +++ /dev/null @@ -1,6 +0,0 @@ -import sys - -IS_PY3 = sys.version_info >= (3, 0) - -if IS_PY3: - unicode = str \ No newline at end of file diff --git a/src/Sentiment/Helper/Sentiment.py b/src/StockSight/Helper/Sentiment.py similarity index 98% rename from src/Sentiment/Helper/Sentiment.py rename to src/StockSight/Helper/Sentiment.py index a53a1f8..afd8ac8 100644 --- a/src/Sentiment/Helper/Sentiment.py +++ b/src/StockSight/Helper/Sentiment.py @@ -7,7 +7,7 @@ from textblob import TextBlob from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer -from Sentiment.Initializer.LoggerInit import * +from StockSight.Initializer.Logger import * def get_page_text(url): diff --git a/src/Sentiment/Helper/__init__.py b/src/StockSight/Helper/__init__.py similarity index 100% rename from src/Sentiment/Helper/__init__.py rename to src/StockSight/Helper/__init__.py diff --git a/src/Sentiment/Initializer/ElasticSearchInit.py b/src/StockSight/Initializer/ElasticSearch.py similarity index 100% rename from src/Sentiment/Initializer/ElasticSearchInit.py rename to src/StockSight/Initializer/ElasticSearch.py diff --git a/src/Sentiment/Initializer/LoggerInit.py b/src/StockSight/Initializer/Logger.py similarity index 100% rename from src/Sentiment/Initializer/LoggerInit.py rename to src/StockSight/Initializer/Logger.py diff --git a/src/Sentiment/Initializer/RedisInit.py b/src/StockSight/Initializer/Redis.py similarity index 100% rename from src/Sentiment/Initializer/RedisInit.py rename to src/StockSight/Initializer/Redis.py diff --git a/src/Sentiment/Initializer/__init__.py b/src/StockSight/Initializer/__init__.py similarity index 100% rename from src/Sentiment/Initializer/__init__.py rename to src/StockSight/Initializer/__init__.py diff --git a/src/Sentiment/NewsHeadlineListener.py b/src/StockSight/NewsHeadlineListener.py similarity index 89% rename from src/Sentiment/NewsHeadlineListener.py rename to src/StockSight/NewsHeadlineListener.py index 5d71682..3fdadb0 100644 --- a/src/Sentiment/NewsHeadlineListener.py +++ b/src/StockSight/NewsHeadlineListener.py @@ -10,16 +10,16 @@ import urlparse from config import * -from Sentiment.Initializer.ElasticSearchInit import es -from Sentiment.Initializer.str_unicode import * -from Sentiment.Initializer.RedisInit import rds -from Sentiment.Helper.Sentiment import * +from StockSight.Initializer.ElasticSearch import es +from StockSight.Initializer.Redis import rds +from StockSight.Helper.Sentiment import * class NewsHeadlineListener: def __init__(self, symbol,url=None): self.symbol = symbol self.url = url + self.cache_length = 2628000 new_headlines = self.get_news_headlines(self.url) @@ -48,7 +48,7 @@ def __init__(self, symbol,url=None): for t in nltk_tokens_ignored: if t in tokens: logger.info("Text contains token from ignore list, not adding") - rds.set(md5_hash,1,2628000) + rds.set(md5_hash,1,self.cache_length) continue @@ -67,7 +67,7 @@ def __init__(self, symbol,url=None): if not tokenspass: logger.info("Text does not contain token from required list, not adding") - rds.set(md5_hash,1,2628000) + rds.set(md5_hash,1,self.cache_length) continue # get sentiment values @@ -83,7 +83,7 @@ def __init__(self, symbol,url=None): "polarity": polarity, "subjectivity": subjectivity, "sentiment": sentiment}) - rds.set(md5_hash,1,2628000) + rds.set(md5_hash,1,self.cache_length) def get_news_headlines(self, url): @@ -105,7 +105,7 @@ def get_news_headlines(self, url): if html: for i in html: - latestheadlines.append((unicode(i.next.next.next.next), url)) + latestheadlines.append((str(i.next.next.next.next), url)) logger.debug(latestheadlines) if follow_link: @@ -121,7 +121,7 @@ def get_news_headlines(self, url): for linkurl in latestheadlines_links: for p in get_page_text(linkurl): - latestheadlines.append((unicode(p), linkurl)) + latestheadlines.append((str(p), linkurl)) logger.debug(latestheadlines) except requests.exceptions.RequestException as re: diff --git a/src/StockSight/StockPriceListener.py b/src/StockSight/StockPriceListener.py new file mode 100644 index 0000000..3e31581 --- /dev/null +++ b/src/StockSight/StockPriceListener.py @@ -0,0 +1,103 @@ +import time +import datetime +import re +import requests +from pytz import timezone + +from config import weekday_start, weekday_end, hour_start, hour_end, timezone_str +from StockSight.Initializer.Logger import logger +from StockSight.Initializer.ElasticSearch import es + +regex = re + +class StockPriceListener: + + def get_price(self, symbol): + url = "https://query1.finance.yahoo.com/v8/finance/chart/SYMBOL?region=US&lang=en-US&includePrePost=false&interval=2m&range=5d&corsDomain=finance.yahoo.com&.tsrc=finance" + eastern_timezone = timezone(timezone_str) + + if self.isNotLive(eastern_timezone): + today = datetime.datetime.now(eastern_timezone) + logger.info("Stock market is not live. Current time: %s" % today.strftime("%Y-%m-%d %H:%M")) + return self; + + + + logger.info("Grabbing stock data for symbol %s..." % symbol) + + try: + + # add stock symbol to url + url = regex.sub("SYMBOL", symbol, url) + # get stock data (json) from url + try: + r = requests.get(url) + data = r.json() + except (requests.HTTPError, requests.ConnectionError, requests.ConnectTimeout) as re: + logger.error("Exception: exception getting stock data from url caused by %s" % re) + raise + logger.debug(data) + # build dict to store stock info + try: + D = {} + D['symbol'] = symbol + D['last'] = data['chart']['result'][0]['indicators']['quote'][0]['close'][-1] + if D['last'] is None: + D['last'] = data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] + D['date'] = time.strftime('%Y-%m-%dT%H:%M:%S', time.gmtime()) # time now in gmt (utc) + try: + D['change'] = (data['chart']['result'][0]['indicators']['quote'][0]['close'][-1] - + data['chart']['result'][0]['indicators']['quote'][0]['close'][-2]) / \ + data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] * 100 + except TypeError: + D['change'] = (data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] - + data['chart']['result'][0]['indicators']['quote'][0]['close'][-3]) / \ + data['chart']['result'][0]['indicators']['quote'][0]['close'][-3] * 100 + pass + D['high'] = data['chart']['result'][0]['indicators']['quote'][0]['high'][-1] + if D['high'] is None: + D['high'] = data['chart']['result'][0]['indicators']['quote'][0]['high'][-2] + D['low'] = data['chart']['result'][0]['indicators']['quote'][0]['low'][-1] + if D['low'] is None: + D['low'] = data['chart']['result'][0]['indicators']['quote'][0]['low'][-2] + D['vol'] = data['chart']['result'][0]['indicators']['quote'][0]['volume'][-1] + if D['vol'] is None: + D['vol'] = data['chart']['result'][0]['indicators']['quote'][0]['volume'][-2] + logger.debug(D) + except KeyError as e: + logger.error("Exception: exception getting stock data caused by %s" % e) + raise + + # check before adding to ES + if D['last'] is not None and D['high'] is not None and D['low'] is not None: + logger.info("Adding stock data to Elasticsearch...") + # add stock price info to elasticsearch + es.index(index=symbol, + doc_type="stock", + body={"symbol": D['symbol'], + "price_last": D['last'], + "date": D['date'], + "change": D['change'], + "price_high": D['high'], + "price_low": D['low'], + "vol": D['vol'] + }) + else: + logger.warning("Some stock data had null values, not adding to Elasticsearch") + + except Exception as e: + logger.error("Exception: can't get stock data, trying again later, reason is %s" % e) + pass + + return self; + + + def isNotLive(self, timezone): + today = datetime.datetime.now(timezone); + if today.weekday() >= weekday_start and \ + today.weekday() <= weekday_end and \ + today.hour >= hour_start and \ + today.hour <= hour_end: + return False; + + return True; diff --git a/src/Sentiment/__init__.py b/src/StockSight/__init__.py similarity index 100% rename from src/Sentiment/__init__.py rename to src/StockSight/__init__.py diff --git a/src/delindex.py b/src/delindex.py index 0fc9256..03ac084 100644 --- a/src/delindex.py +++ b/src/delindex.py @@ -1,7 +1,7 @@ import argparse -from Sentiment.Initializer.ElasticSearchInit import es -from Sentiment.Initializer.LoggerInit import * +from StockSight.Initializer.ElasticSearch import es +from StockSight.Initializer.Logger import * if __name__ == '__main__': diff --git a/src/import.kibana.py b/src/import.kibana.py index cc48da9..e6c3ffa 100644 --- a/src/import.kibana.py +++ b/src/import.kibana.py @@ -6,13 +6,13 @@ if __name__ == '__main__': try: - template_file = open('../kibana_export/export.7.3.ndjson', "rt", encoding='utf-8') + template_file = open('kibana_export/export.7.3.ndjson', "rt", encoding='utf-8') import_template = template_file.read() template_file.close() for symbol in symbols: try: - ndjson_file_path = '../kibana_export/'+symbol+'_exports.ndjson' + ndjson_file_path = 'kibana_export/'+symbol+'_exports.ndjson' if os.path.exists(ndjson_file_path) is False: ndjson_file = open(ndjson_file_path, "xt", encoding='utf-8') final_text = import_template.replace('tmpl',symbol) diff --git a/kibana_export/export.7.3.ndjson b/src/kibana_export/export.7.3.ndjson similarity index 100% rename from kibana_export/export.7.3.ndjson rename to src/kibana_export/export.7.3.ndjson diff --git a/src/news.sentiment.py b/src/news.sentiment.py index 46ccd8b..678c3a6 100644 --- a/src/news.sentiment.py +++ b/src/news.sentiment.py @@ -11,21 +11,14 @@ """ import argparse -import time -import random +import sys -from Sentiment.NewsHeadlineListener import * +from StockSight.NewsHeadlineListener import * STOCKSIGHT_VERSION = '0.1-b.6' __version__ = STOCKSIGHT_VERSION -IS_PY3 = sys.version_info >= (3, 0) - -if IS_PY3: - unicode = str - - if __name__ == '__main__': # parse cli args @@ -118,7 +111,6 @@ # create instance of NewsHeadlineListener newslistener = NewsHeadlineListener(symbol, url) - time.sleep(random.randrange(2,5)) except Exception as e: logger.warning("%s" % e) pass diff --git a/src/startup.sh b/src/startup.sh index 171e39f..4c56b59 100755 --- a/src/startup.sh +++ b/src/startup.sh @@ -1,12 +1,29 @@ #!/bin/bash + sleep 30; +echo "Copy kibana dashboard if they don't exist"; python import.kibana.py +tick_time=900 +tick=0 +let sentiment_time=900*4 while true do - #python stockprice.py -s AMD -i amd & - python news.sentiment.py & - sleep 3600 + echo "Spawning stock price receiver instance"; + python stockprice.py & + echo "Will get stock data again in ${tick_time} sec..."; + let tick_mod=tick%4 + + if [ $tick_mod -eq 0 ] + then + echo "Spawning news sentiment receiver instance"; + python news.sentiment.py & + echo "Will get sentiment data again in ${sentiment_time} sec..."; + let tick=0; + fi + + sleep $tick_time + let tick++ done diff --git a/src/stockprice.py b/src/stockprice.py index dba072d..4d6c68e 100644 --- a/src/stockprice.py +++ b/src/stockprice.py @@ -19,123 +19,14 @@ import requests from pytz import timezone -try: - from elasticsearch5 import Elasticsearch -except ImportError: - from elasticsearch import Elasticsearch -from random import randint - -# import elasticsearch host -from config import elasticsearch_host, elasticsearch_port, elasticsearch_user, elasticsearch_password, \ - price_frequency, weekday_start, weekday_end, hour_start, hour_end, timezone_str +from config import symbols +from StockSight.Initializer.ElasticSearch import es +from StockSight.StockPriceListener import StockPriceListener STOCKSIGHT_VERSION = '0.1-b.5' __version__ = STOCKSIGHT_VERSION -# url to fetch stock price from, SYMBOL will be replaced with symbol from cli args -url = "https://query1.finance.yahoo.com/v8/finance/chart/SYMBOL?region=US&lang=en-US&includePrePost=false&interval=2m&range=5d&corsDomain=finance.yahoo.com&.tsrc=finance" - -# create instance of elasticsearch -es = Elasticsearch(hosts=[{'host': elasticsearch_host, 'port': elasticsearch_port}], - http_auth=(elasticsearch_user, elasticsearch_password)) - -regex = re - -class GetStock: - - def get_price(self, url, symbol): - - eastern_timezone = timezone(timezone_str) - - while True: - - if self.isNotLive(eastern_timezone): - today = datetime.datetime.now(eastern_timezone) - logger.info("Stock market is not live. Current time: %s" % today.strftime("%Y-%m-%d %H:%M")) - logger.info("Will get stock data again in %s sec..." % args.frequency) - time.sleep(args.frequency) - continue - - - logger.info("Grabbing stock data for symbol %s..." % symbol) - - try: - - # add stock symbol to url - url = regex.sub("SYMBOL", symbol, url) - # get stock data (json) from url - try: - r = requests.get(url) - data = r.json() - except (requests.HTTPError, requests.ConnectionError, requests.ConnectTimeout) as re: - logger.error("Exception: exception getting stock data from url caused by %s" % re) - raise - logger.debug(data) - # build dict to store stock info - try: - D = {} - D['symbol'] = symbol - D['last'] = data['chart']['result'][0]['indicators']['quote'][0]['close'][-1] - if D['last'] is None: - D['last'] = data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] - D['date'] = time.strftime('%Y-%m-%dT%H:%M:%S', time.gmtime()) # time now in gmt (utc) - try: - D['change'] = (data['chart']['result'][0]['indicators']['quote'][0]['close'][-1] - - data['chart']['result'][0]['indicators']['quote'][0]['close'][-2]) / \ - data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] * 100 - except TypeError: - D['change'] = (data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] - - data['chart']['result'][0]['indicators']['quote'][0]['close'][-3]) / \ - data['chart']['result'][0]['indicators']['quote'][0]['close'][-3] * 100 - pass - D['high'] = data['chart']['result'][0]['indicators']['quote'][0]['high'][-1] - if D['high'] is None: - D['high'] = data['chart']['result'][0]['indicators']['quote'][0]['high'][-2] - D['low'] = data['chart']['result'][0]['indicators']['quote'][0]['low'][-1] - if D['low'] is None: - D['low'] = data['chart']['result'][0]['indicators']['quote'][0]['low'][-2] - D['vol'] = data['chart']['result'][0]['indicators']['quote'][0]['volume'][-1] - if D['vol'] is None: - D['vol'] = data['chart']['result'][0]['indicators']['quote'][0]['volume'][-2] - logger.debug(D) - except KeyError as e: - logger.error("Exception: exception getting stock data caused by %s" % e) - raise - - # check before adding to ES - if D['last'] is not None and D['high'] is not None and D['low'] is not None: - logger.info("Adding stock data to Elasticsearch...") - # add stock price info to elasticsearch - es.index(index=args.index, - doc_type="stock", - body={"symbol": D['symbol'], - "price_last": D['last'], - "date": D['date'], - "change": D['change'], - "price_high": D['high'], - "price_low": D['low'], - "vol": D['vol'] - }) - else: - logger.warning("Some stock data had null values, not adding to Elasticsearch") - - except Exception as e: - logger.error("Exception: can't get stock data, trying again later, reason is %s" % e) - pass - - logger.info("Will get stock data again in %s sec..." % args.frequency) - time.sleep(args.frequency) - - def isNotLive(self, timezone): - today = datetime.datetime.now(timezone); - if today.weekday() >= weekday_start and \ - today.weekday() <= weekday_end and \ - today.hour >= hour_start and \ - today.hour <= hour_end: - return False; - - return True; @@ -143,14 +34,7 @@ def isNotLive(self, timezone): # parse cli args parser = argparse.ArgumentParser() - parser.add_argument("-i", "--index", metavar="INDEX", default="stocksight", - help="Index name for Elasticsearch (default: stocksight)") - parser.add_argument("-d", "--delindex", action="store_true", - help="Delete existing Elasticsearch index first") - parser.add_argument("-s", "--symbol", metavar="SYMBOL", - help="Stock symbol to use, example: TSLA") - parser.add_argument("-f", "--frequency", metavar="FREQUENCY", default=price_frequency, type=int, - help="How often in seconds to retrieve stock data (default: %d sec)" % price_frequency) + parser.add_argument("-v", "--verbose", action="store_true", help="Increase output verbosity") parser.add_argument("--debug", action="store_true", @@ -197,34 +81,6 @@ def isNotLive(self, timezone): eslogger.disabled = True requestslogger.disabled = True - # print banner - if not args.quiet: - c = randint(1, 4) - if c == 1: - color = '31m' - elif c == 2: - color = '32m' - elif c == 3: - color = '33m' - elif c == 4: - color = '35m' - - banner = """\033[%s - - /$$ /$$ /$$ /$$ /$$ - | $$ | $$ |__/ | $$ | $$ - /$$$$$$$ /$$$$$$ /$$$$$$ /$$$$$$$| $$ /$$ /$$$$$$$ /$$ /$$$$$$ | $$$$$$$ /$$$$$$ - /$$_____/|_ $$_/ /$$__ $$ /$$_____/| $$ /$$/ /$$_____/| $$ /$$__ $$| $$__ $$|_ $$_/ - | $$$$$$ | $$ | $$ \ $$| $$ | $$$$$$/ | $$$$$$ | $$| $$ \ $$| $$ \ $$ | $$ - \____ $$ | $$ /$$| $$ | $$| $$ | $$_ $$ \____ $$| $$| $$ | $$| $$ | $$ | $$ /$$ - /$$$$$$$/ | $$$$/| $$$$$$/| $$$$$$$| $$ \ $$ /$$$$$$$/| $$| $$$$$$$| $$ | $$ | $$$$/ - |_______/ \___/ \______/ \_______/|__/ \__/|_______/ |__/ \____ $$|__/ |__/ \___/ - /$$ \ $$ - :) = +$ :( = -$ | $$$$$$/ - \______/ v%s - \033[0m""" % (color, STOCKSIGHT_VERSION) - print(banner + '\n') - # set up elasticsearch mappings and create index mappings = { "mappings": { @@ -256,23 +112,20 @@ def isNotLive(self, timezone): } } - if args.symbol is None: - print("No stock symbol, see -h for help.") - sys.exit(1) - - if args.delindex: - logger.info('Deleting existing Elasticsearch index ' + args.index) - es.indices.delete(index=args.index, ignore=[400, 404]) - - logger.info('Creating new Elasticsearch index or using existing ' + args.index) - es.indices.create(index=args.index, body=mappings, ignore=[400, 404]) + try: + for symbol in symbols: + try: + logger.info('Creating new Elasticsearch index or using existing ' + symbol) + es.indices.create(index=symbol, body=mappings, ignore=[400, 404]) - # create instance of GetStock - stockprice = GetStock() + stockprice = StockPriceListener() - try: + stockprice.get_price(symbol=symbol) + except Exception as e: + logger.warning("%s" % e) + pass # get stock price - stockprice.get_price(symbol=args.symbol, url=url) + except Exception as e: logger.warning("Exception: Failed to get stock data caused by: %s" % e) except KeyboardInterrupt: From cb42d10c9acd491a8047d69572e137e3885e060c Mon Sep 17 00:00:00 2001 From: heyqule Date: Mon, 2 Sep 2019 02:04:21 -0400 Subject: [PATCH 22/55] Update Copyright --- python-docker/Dockerfile | 2 + redis-docker/Dockerfile | 1 + src/StockSight/Helper/Sentiment.py | 12 + src/StockSight/Initializer/ElasticSearch.py | 13 + src/StockSight/Initializer/Logger.py | 13 + src/StockSight/Initializer/Redis.py | 11 + src/StockSight/NewsHeadlineListener.py | 12 + src/StockSight/StockPriceListener.py | 12 + src/StockSight/TweetListener.py | 216 +++++++++++++++++ src/config.sample.py | 21 +- src/delindex.py | 12 + src/import.kibana.py | 11 + src/news.sentiment.py | 7 +- src/{sentiment.py => sentiment.og.py} | 86 ++----- src/startup.sample.sh | 16 -- src/startup.sh | 12 +- src/stockprice.og.py | 254 ++++++++++++++++++++ src/stockprice.py | 2 +- src/tweet.sentiment.py | 234 ++++++++++++++++++ 19 files changed, 858 insertions(+), 89 deletions(-) create mode 100644 src/StockSight/TweetListener.py rename src/{sentiment.py => sentiment.og.py} (94%) delete mode 100755 src/startup.sample.sh create mode 100644 src/stockprice.og.py create mode 100644 src/tweet.sentiment.py diff --git a/python-docker/Dockerfile b/python-docker/Dockerfile index 80569e0..42557c2 100644 --- a/python-docker/Dockerfile +++ b/python-docker/Dockerfile @@ -1,5 +1,7 @@ FROM python:3-alpine +LABEL maintainer="Allen (Jian Feng) Xie" + WORKDIR /usr/src/app ADD requirements.txt ./requirements.txt diff --git a/redis-docker/Dockerfile b/redis-docker/Dockerfile index 4c7d407..8f890d7 100644 --- a/redis-docker/Dockerfile +++ b/redis-docker/Dockerfile @@ -1,3 +1,4 @@ FROM redis:5-alpine +LABEL maintainer="Allen (Jian Feng) Xie" COPY redis.conf /usr/local/etc/redis/redis.conf CMD [ "redis-server", "/usr/local/etc/redis/redis.conf" ] \ No newline at end of file diff --git a/src/StockSight/Helper/Sentiment.py b/src/StockSight/Helper/Sentiment.py index afd8ac8..4de6756 100644 --- a/src/StockSight/Helper/Sentiment.py +++ b/src/StockSight/Helper/Sentiment.py @@ -1,3 +1,15 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""stockprice.py - get stock price from Yahoo and add to +Elasticsearch. +See README.md or https://github.com/shirosaidev/stocksight +for more information. + +Copyright (C) Chris Park 2018 +Copyright (C) Allen (Jian Feng) Xie 2019 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" try: import urllib.parse as urlparse except ImportError: diff --git a/src/StockSight/Initializer/ElasticSearch.py b/src/StockSight/Initializer/ElasticSearch.py index ce4c128..30cb0e7 100644 --- a/src/StockSight/Initializer/ElasticSearch.py +++ b/src/StockSight/Initializer/ElasticSearch.py @@ -1,3 +1,16 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""stockprice.py - get stock price from Yahoo and add to +Elasticsearch. +See README.md or https://github.com/shirosaidev/stocksight +for more information. + +Copyright (C) Chris Park 2018 +Copyright (C) Allen (Jian Feng) Xie 2019 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" + try: from elasticsearch5 import Elasticsearch except ImportError: diff --git a/src/StockSight/Initializer/Logger.py b/src/StockSight/Initializer/Logger.py index aa825c4..0f045d9 100644 --- a/src/StockSight/Initializer/Logger.py +++ b/src/StockSight/Initializer/Logger.py @@ -1,3 +1,16 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""stockprice.py - get stock price from Yahoo and add to +Elasticsearch. +See README.md or https://github.com/shirosaidev/stocksight +for more information. + +Copyright (C) Chris Park 2018 +Copyright (C) Allen (Jian Feng) Xie 2019 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" + import logging diff --git a/src/StockSight/Initializer/Redis.py b/src/StockSight/Initializer/Redis.py index 62997b5..cdd1b2c 100644 --- a/src/StockSight/Initializer/Redis.py +++ b/src/StockSight/Initializer/Redis.py @@ -1,3 +1,14 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""stockprice.py - get stock price from Yahoo and add to +Elasticsearch. +See README.md or https://github.com/shirosaidev/stocksight +for more information. + +Copyright (C) Allen (Jian Feng) Xie 2019 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" import redis from config import redis_host, redis_port diff --git a/src/StockSight/NewsHeadlineListener.py b/src/StockSight/NewsHeadlineListener.py index 3fdadb0..d534b7e 100644 --- a/src/StockSight/NewsHeadlineListener.py +++ b/src/StockSight/NewsHeadlineListener.py @@ -1,3 +1,15 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""stockprice.py - get stock price from Yahoo and add to +Elasticsearch. +See README.md or https://github.com/shirosaidev/stocksight +for more information. + +Copyright (C) Chris Park 2018 +Copyright (C) Allen (Jian Feng) Xie 2019 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" import hashlib import re from datetime import datetime diff --git a/src/StockSight/StockPriceListener.py b/src/StockSight/StockPriceListener.py index 3e31581..9eca371 100644 --- a/src/StockSight/StockPriceListener.py +++ b/src/StockSight/StockPriceListener.py @@ -1,3 +1,15 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""stockprice.py - get stock price from Yahoo and add to +Elasticsearch. +See README.md or https://github.com/shirosaidev/stocksight +for more information. + +Copyright (C) Chris Park 2018 +Copyright (C) Allen (Jian Feng) Xie 2019 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" import time import datetime import re diff --git a/src/StockSight/TweetListener.py b/src/StockSight/TweetListener.py new file mode 100644 index 0000000..30db6db --- /dev/null +++ b/src/StockSight/TweetListener.py @@ -0,0 +1,216 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""stockprice.py - get stock price from Yahoo and add to +Elasticsearch. +See README.md or https://github.com/shirosaidev/stocksight +for more information. + +Copyright (C) Chris Park 2018 +Copyright (C) Allen (Jian Feng) Xie 2019 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" + +import json +import re +import time + +import nltk + +from config import * +from StockSight.Initializer.ElasticSearch import es +from StockSight.Helper.Sentiment import * + +from tweepy.streaming import StreamListener +from tweepy import API, Stream, OAuthHandler, TweepError + +class TweetStreamListener(StreamListener): + + # tweet id list + tweet_ids = [] + + # file to hold twitter user ids + twitter_users_file = './twitteruserids.txt' + + def __init__(self,symbol): + self.symbol = symbol + + # on success + def on_data(self, data): + + try: + # decode json + dict_data = json.loads(data) + + logger.debug(dict_data) + + # clean up tweet text + #text = unicodedata.normalize( + # 'NFKD', dict_data["text"]).encode('ascii', 'ignore') + text = dict_data["text"] + if text is None: + logger.info("Tweet has no relevant text, skipping") + return True + + # grab html links from tweet + #tweet_urls = re.search("http\S+", text) + + # clean up tweet text more + text = text.replace("\n", " ") + text = re.sub(r"http\S+", "", text) + text = re.sub(r"&.*?;", "", text) + text = re.sub(r"<.*?>", "", text) + text = text.replace("RT", "") + text = text.replace(u"…", "") + text = text.strip() + + # get date when tweet was created + created_date = time.strftime( + '%Y-%m-%dT%H:%M:%S', time.strptime(dict_data['created_at'], '%a %b %d %H:%M:%S +0000 %Y')) + + # store dict_data into vars + screen_name = str(dict_data.get("user", {}).get("screen_name")) + location = str(dict_data.get("user", {}).get("location")) + language = str(dict_data.get("user", {}).get("lang")) + friends = int(dict_data.get("user", {}).get("friends_count")) + followers = int(dict_data.get("user", {}).get("followers_count")) + statuses = int(dict_data.get("user", {}).get("statuses_count")) + text_filtered = str(text) + tweetid = int(dict_data.get("id")) + text_raw = str(dict_data.get("text")) + + # output twitter data + print("\n------------------------------") + print("Tweet Date: " + created_date) + print("Screen Name: " + screen_name) + print("Location: " + location) + print("Language: " + language) + print("Friends: " + str(friends)) + print("Followers: " + str(followers)) + print("Statuses: " + str(statuses)) + print("Tweet ID: " + str(tweetid)) + print("Tweet Raw Text: " + text_raw) + print("Tweet Filtered Text: " + text_filtered) + + # create tokens of words in text using nltk + text_for_tokens = re.sub( + r"[\%|\$|\.|\,|\!|\:|\@]|\(|\)|\#|\+|(``)|('')|\?|\-", "", text_filtered) + tokens = nltk.word_tokenize(text_for_tokens) + print("NLTK Tokens: " + str(tokens)) + + # do some checks before adding to elasticsearch and crawling urls in tweet + if friends == 0 or \ + followers == 0 or \ + statuses == 0 or \ + text == "" or \ + tweetid in self.tweet_ids: + logger.info("Tweet doesn't meet min requirements, not adding") + return True + + # check ignored tokens from config + for t in nltk_tokens_ignored: + if t in tokens: + logger.info("Tweet contains token from ignore list, not adding") + return True + # check required tokens from config + tokenspass = False + for t in nltk_tokens_required: + if t in tokens: + tokenspass = True + break + if not tokenspass: + logger.info("Tweet does not contain token from required list, not adding") + return True + + # strip out hashtags for language processing + tweet = re.sub(r"[#|@|\$]\S+", "", text) + tweet.strip() + + # get sentiment values + polarity, subjectivity, sentiment = sentiment_analysis(tweet) + + # add tweet_id to list + self.tweet_ids.append(dict_data["id"]) + + # remove hashtags for elasticsearch + #text_filtered = re.sub(r"[#|@|\$]\S+", "", text_filtered) + + logger.info("Adding tweet to elasticsearch") + # add twitter data and sentiment info to elasticsearch + es.index(index=self.symbol, + doc_type="tweet", + body={"author": screen_name, + "location": location, + "language": language, + "friends": friends, + "followers": followers, + "statuses": statuses, + "date": created_date, + "message": text_filtered, + "tweet_id": tweetid, + "polarity": polarity, + "subjectivity": subjectivity, + "sentiment": sentiment}) + + return True + + except Exception as e: + logger.warning("Exception: exception caused by: %s" % e) + raise + + # on failure + def on_error(self, status_code): + logger.error("Got an error with status code: %s" % status_code) + return True + + # on timeout + def on_timeout(self): + logger.warning("Timeout...") + return True + + + +def get_twitter_users_from_url(url): + twitter_users = [] + logger.info("Grabbing any twitter users from url %s" % url) + try: + twitter_urls = ("http://twitter.com/", "http://www.twitter.com/", + "https://twitter.com/", "https://www.twitter.com/") + # req_header = {'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Safari/604.1.38"} + req = requests.get(url) + html = req.text + soup = BeautifulSoup(html, 'html.parser') + html_links = [] + for link in soup.findAll('a'): + html_links.append(link.get('href')) + + if html_links: + for link in html_links: + # check if twitter_url in link + parsed_uri = urlparse.urljoin(link, '/') + # get twitter user name from link and add to list + if parsed_uri in twitter_urls and "=" not in link and "?" not in link: + user = link.split('/')[3] + twitter_users.append(u'@' + user) + logger.debug(twitter_users) + except requests.exceptions.RequestException as re: + logger.warning("Requests exception: can't crawl web site caused by: %s" % re) + pass + return twitter_users + + +def get_twitter_users_from_file(file): + # get twitter user ids from text file + twitter_users = [] + logger.info("Grabbing any twitter user ids from file %s" % file) + try: + f = open(file, "rt", encoding='utf-8') + for line in f.readlines(): + u = line.strip() + twitter_users.append(u) + logger.debug(twitter_users) + f.close() + except (IOError, OSError) as e: + logger.warning("Exception: error opening file caused by: %s" % e) + pass + return twitter_users \ No newline at end of file diff --git a/src/config.sample.py b/src/config.sample.py index 6da12d2..9a76417 100644 --- a/src/config.sample.py +++ b/src/config.sample.py @@ -4,17 +4,26 @@ elasticsearch_user = "" elasticsearch_password = "" +redis_host = "redis" +redis_port = 6379 + +symbols = ['tsla','amd']; +follow_link = False; + + #Sentiment Analyizers config -consumer_key = "" -consumer_secret = "" -access_token = "" -access_token_secret = "" nltk_tokens_required = { 'default': ("increase","decrease","buying","sold","buy","selling","winning","losing"), 'tsla': ("tesla", "@tesla", "#tesla", "tsla", "#tsla", "elonmusk", "elon", "musk"), 'amd': ('amd','ryzen','epyc','radeon','crossfire','threadripper') } -nltk_tokens_ignored = ("win", "Win", "giveaway", "Giveaway") +nltk_tokens_ignored = ("win", "giveaway") + +#Twitter Settings +consumer_key = "" +consumer_secret = "" +access_token = "" +access_token_secret = "" twitter_feeds = ["@elonmusk", "@cnbc", "@benzinga", "@stockwits", "@Newsweek", "@WashingtonPost", "@breakoutstocks", "@bespokeinvest", "@WSJMarkets", "@stephanie_link", "@nytimesbusiness", "@IBDinvestors", @@ -23,10 +32,8 @@ "@muddywatersre", "@mcuban", "@AswathDamodaran", "@elerianm", "@MorganStanley", "@ianbremmer", "@GoldmanSachs", "@Wu_Tang_Finance", "@Schuldensuehner", "@NorthmanTrader", "@Frances_Coppola", "@BuzzFeed","@nytimes"] -sentiment_frequency = 3600 #Stock Price fetcher config -price_frequency = 900 weekday_start = 1 weekday_end = 5 hour_start = 9 diff --git a/src/delindex.py b/src/delindex.py index 03ac084..12ae7d2 100644 --- a/src/delindex.py +++ b/src/delindex.py @@ -1,3 +1,15 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""stockprice.py - get stock price from Yahoo and add to +Elasticsearch. +See README.md or https://github.com/shirosaidev/stocksight +for more information. + +Copyright (C) Chris Park 2018 +Copyright (C) Allen (Jian Feng) Xie 2019 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" import argparse from StockSight.Initializer.ElasticSearch import es diff --git a/src/import.kibana.py b/src/import.kibana.py index e6c3ffa..6150bec 100644 --- a/src/import.kibana.py +++ b/src/import.kibana.py @@ -1,3 +1,14 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""stockprice.py - get stock price from Yahoo and add to +Elasticsearch. +See README.md or https://github.com/shirosaidev/stocksight +for more information. + +Copyright (C) Allen (Jian Feng) Xie 2019 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" import requests import sys import os.path diff --git a/src/news.sentiment.py b/src/news.sentiment.py index 678c3a6..c446ac4 100644 --- a/src/news.sentiment.py +++ b/src/news.sentiment.py @@ -1,11 +1,12 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -"""sentiment.py - analyze tweets news sites and their sentiment values to +"""stockprice.py - get stock price from Yahoo and add to Elasticsearch. -See README.md or https://github.com/heyqule/stocksight +See README.md or https://github.com/shirosaidev/stocksight for more information. -Copyright (C) Allen Xie 2019 +Copyright (C) Chris Park 2018 +Copyright (C) Allen (Jian Feng) Xie 2019 stocksight is released under the Apache 2.0 license. See LICENSE for the full license text. """ diff --git a/src/sentiment.py b/src/sentiment.og.py similarity index 94% rename from src/sentiment.py rename to src/sentiment.og.py index 482e959..c6d6c9a 100644 --- a/src/sentiment.py +++ b/src/sentiment.og.py @@ -11,16 +11,15 @@ LICENSE for the full license text. """ -import argparse -import json -import logging -import re import sys +import json import time - -import nltk +import re +import unicodedata import requests - +import nltk +import argparse +import logging try: import urllib.parse as urlparse except ImportError: @@ -200,12 +199,11 @@ def on_timeout(self): class NewsHeadlineListener: - def __init__(self, url=None, frequency=sentiment_frequency): + def __init__(self, url=None, frequency=120): self.url = url self.headlines = [] self.followedlinks = [] self.frequency = frequency - self.max_cache = 1000; while True: new_headlines = self.get_news_headlines(self.url) @@ -225,7 +223,7 @@ def __init__(self, url=None, frequency=sentiment_frequency): # create tokens of words in text using nltk text_for_tokens = re.sub( r"[\%|\$|\.|\,|\!|\:|\@]|\(|\)|\#|\+|(``)|('')|\?|\-", "", htext) - tokens = nltk.word_tokenize(text_for_tokens.lower()) + tokens = nltk.word_tokenize(text_for_tokens) print("NLTK Tokens: " + str(tokens)) # check ignored tokens from config @@ -235,14 +233,7 @@ def __init__(self, url=None, frequency=sentiment_frequency): continue # check required tokens from config tokenspass = False - - - if args.index in nltk_tokens_required: - nltk_tokens = nltk_tokens_required[args.index] - else: - nltk_tokens = nltk_tokens_required['default'] - - for t in nltk_tokens: + for t in nltk_tokens_required: if t in tokens: tokenspass = True break @@ -264,27 +255,9 @@ def __init__(self, url=None, frequency=sentiment_frequency): "subjectivity": subjectivity, "sentiment": sentiment}) - new_headlines = None; - self.cleanup() - logger.info("Will get news headlines again in %s sec..." % self.frequency) time.sleep(self.frequency) - def cleanup(self): - new_headline = [] - new_followlink = [] - - if len(self.headlines) > self.max_cache: - for i in range(self.max_cache / 2, len(self.headlines) - 1): - new_headline.append(self.headlines[i]) - self.headlines = new_headline - - if len(self.followedlinks) > self.max_cache: - for i in range(self.max_cache / 2, len(self.followedlinks) - 1): - new_followlink.append(self.followedlinks[i]) - self.followedlinks = new_followlink - - def get_news_headlines(self, url): latestheadlines = [] @@ -304,7 +277,7 @@ def get_news_headlines(self, url): if html: for i in html: - latestheadlines.append((unicode(i.next.next.next.next), url)) + latestheadlines.append((i.next.next.next.next, url)) logger.debug(latestheadlines) if args.followlinks: @@ -321,7 +294,7 @@ def get_news_headlines(self, url): for linkurl in latestheadlines_links: for p in get_page_text(linkurl): - latestheadlines.append((unicode(p), linkurl)) + latestheadlines.append((p, linkurl)) logger.debug(latestheadlines) except requests.exceptions.RequestException as re: @@ -464,7 +437,6 @@ def get_twitter_users_from_url(url): html_links = [] for link in soup.findAll('a'): html_links.append(link.get('href')) - if html_links: for link in html_links: # check if twitter_url in link @@ -514,8 +486,8 @@ def get_twitter_users_from_file(file): help="Use twitter user ids from file") parser.add_argument("-n", "--newsheadlines", metavar="SYMBOL", help="Get news headlines instead of Twitter using stock symbol, example: TSLA") - parser.add_argument("--frequency", metavar="FREQUENCY", default=sentiment_frequency, type=int, - help="How often in seconds to retrieve news headlines (default: %d sec)" % sentiment_frequency) + parser.add_argument("--frequency", metavar="FREQUENCY", default=120, type=int, + help="How often in seconds to retrieve news headlines (default: 120 sec)") parser.add_argument("--followlinks", action="store_true", help="Follow links on news headlines and scrape relevant text from landing page") parser.add_argument("-v", "--verbose", action="store_true", @@ -582,17 +554,17 @@ def get_twitter_users_from_file(file): color = '35m' banner = """\033[%s - - /$$ /$$ /$$ /$$ /$$ - | $$ | $$ |__/ | $$ | $$ - /$$$$$$$ /$$$$$$ /$$$$$$ /$$$$$$$| $$ /$$ /$$$$$$$ /$$ /$$$$$$ | $$$$$$$ /$$$$$$ - /$$_____/|_ $$_/ /$$__ $$ /$$_____/| $$ /$$/ /$$_____/| $$ /$$__ $$| $$__ $$|_ $$_/ -| $$$$$$ | $$ | $$ \ $$| $$ | $$$$$$/ | $$$$$$ | $$| $$ \ $$| $$ \ $$ | $$ + + /$$ /$$ /$$ /$$ /$$ + | $$ | $$ |__/ | $$ | $$ + /$$$$$$$ /$$$$$$ /$$$$$$ /$$$$$$$| $$ /$$ /$$$$$$$ /$$ /$$$$$$ | $$$$$$$ /$$$$$$ + /$$_____/|_ $$_/ /$$__ $$ /$$_____/| $$ /$$/ /$$_____/| $$ /$$__ $$| $$__ $$|_ $$_/ +| $$$$$$ | $$ | $$ \ $$| $$ | $$$$$$/ | $$$$$$ | $$| $$ \ $$| $$ \ $$ | $$ \____ $$ | $$ /$$| $$ | $$| $$ | $$_ $$ \____ $$| $$| $$ | $$| $$ | $$ | $$ /$$ /$$$$$$$/ | $$$$/| $$$$$$/| $$$$$$$| $$ \ $$ /$$$$$$$/| $$| $$$$$$$| $$ | $$ | $$$$/ -|_______/ \___/ \______/ \_______/|__/ \__/|_______/ |__/ \____ $$|__/ |__/ \___/ - /$$ \ $$ - :) = +$ :( = -$ | $$$$$$/ +|_______/ \___/ \______/ \_______/|__/ \__/|_______/ |__/ \____ $$|__/ |__/ \___/ + /$$ \ $$ + :) = +$ :( = -$ | $$$$$$/ \______/ v%s \033[0m""" % (color, STOCKSIGHT_VERSION) print(banner + '\n') @@ -813,24 +785,18 @@ def get_twitter_users_from_file(file): logger.info('Twitter keywords: ' + str(args.keywords)) logger.info('Listening for Tweets (ctrl-c to exit)...') if args.keywords is None: - stream.filter(follow= str(useridlist), languages=['en']) + stream.filter(follow=useridlist, languages=['en']) else: # keywords to search on twitter # add keywords to list keywords = args.keywords.split(',') - - if args.index in nltk_tokens_required: - nltk_tokens = nltk_tokens_required[args.index] - else: - nltk_tokens = nltk_tokens_required['default'] - # add tokens to keywords to list - for f in nltk_tokens: - keywords.append(f.lower()) + for f in nltk_tokens_required: + keywords.append(f) stream.filter(track=keywords, languages=['en']) except TweepError as te: logger.debug("Tweepy Exception: Failed to get tweets caused by: %s" % te) except KeyboardInterrupt: print("Ctrl-c keyboard interrupt, exiting...") stream.disconnect() - sys.exit(0) + sys.exit(0) \ No newline at end of file diff --git a/src/startup.sample.sh b/src/startup.sample.sh deleted file mode 100755 index 4adc01b..0000000 --- a/src/startup.sample.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -sleep 30 -python sentiment.py -n TSLA -i tsla & -sleep 1 -python stockprice.py -s TSLA -i tsla & -sleep 1 -python sentiment.py -n AMD -i amd & -sleep 1 -python stockprice.py -s AMD -i amd & - - -while true -do - sleep 3600 -done diff --git a/src/startup.sh b/src/startup.sh index 4c56b59..25527c1 100755 --- a/src/startup.sh +++ b/src/startup.sh @@ -1,7 +1,11 @@ #!/bin/bash +#Copyright (C) Allen (Jian Feng) Xie 2019 +#stocksight is released under the Apache 2.0 license. See +#LICENSE for the full license text. -sleep 30; +echo "Waiting for other dependent instances to spawn... (60 seconds)" +sleep 60; echo "Copy kibana dashboard if they don't exist"; python import.kibana.py @@ -9,6 +13,10 @@ python import.kibana.py tick_time=900 tick=0 let sentiment_time=900*4 + +#echo "Spawning Tweet Sentiment receiver instance"; +#python tweet.sentiment.py & + while true do echo "Spawning stock price receiver instance"; @@ -18,7 +26,7 @@ do if [ $tick_mod -eq 0 ] then - echo "Spawning news sentiment receiver instance"; + echo "Spawning News Headline Sentiment receiver instance"; python news.sentiment.py & echo "Will get sentiment data again in ${sentiment_time} sec..."; let tick=0; diff --git a/src/stockprice.og.py b/src/stockprice.og.py new file mode 100644 index 0000000..9bb940e --- /dev/null +++ b/src/stockprice.og.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""stockprice.py - get stock price from Yahoo and add to +Elasticsearch. +See README.md or https://github.com/shirosaidev/stocksight +for more information. + +Copyright (C) Chris Park 2018 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" + +import time +import requests +import re +import argparse +import logging +import sys +try: + from elasticsearch5 import Elasticsearch +except ImportError: + from elasticsearch import Elasticsearch +from random import randint + +# import elasticsearch host +from config import elasticsearch_host, elasticsearch_port, elasticsearch_user, elasticsearch_password + + +STOCKSIGHT_VERSION = '0.1-b.5' +__version__ = STOCKSIGHT_VERSION + +# url to fetch stock price from, SYMBOL will be replaced with symbol from cli args +url = "https://query1.finance.yahoo.com/v8/finance/chart/SYMBOL?region=US&lang=en-US&includePrePost=false&interval=2m&range=5d&corsDomain=finance.yahoo.com&.tsrc=finance" + +# create instance of elasticsearch +es = Elasticsearch(hosts=[{'host': elasticsearch_host, 'port': elasticsearch_port}], + http_auth=(elasticsearch_user, elasticsearch_password)) + +class GetStock: + + def get_price(self, url, symbol): + import re + + while True: + + logger.info("Grabbing stock data for symbol %s..." % symbol) + + try: + + # add stock symbol to url + url = re.sub("SYMBOL", symbol, url) + # get stock data (json) from url + try: + r = requests.get(url) + data = r.json() + except (requests.HTTPError, requests.ConnectionError, requests.ConnectTimeout) as re: + logger.error("Exception: exception getting stock data from url caused by %s" % re) + raise + logger.debug(data) + # build dict to store stock info + try: + D = {} + D['symbol'] = symbol + D['last'] = data['chart']['result'][0]['indicators']['quote'][0]['close'][-1] + if D['last'] is None: + D['last'] = data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] + D['date'] = time.strftime('%Y-%m-%dT%H:%M:%S', time.gmtime()) # time now in gmt (utc) + try: + D['change'] = (data['chart']['result'][0]['indicators']['quote'][0]['close'][-1] - + data['chart']['result'][0]['indicators']['quote'][0]['close'][-2]) / \ + data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] * 100 + except TypeError: + D['change'] = (data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] - + data['chart']['result'][0]['indicators']['quote'][0]['close'][-3]) / \ + data['chart']['result'][0]['indicators']['quote'][0]['close'][-3] * 100 + pass + D['high'] = data['chart']['result'][0]['indicators']['quote'][0]['high'][-1] + if D['high'] is None: + D['high'] = data['chart']['result'][0]['indicators']['quote'][0]['high'][-2] + D['low'] = data['chart']['result'][0]['indicators']['quote'][0]['low'][-1] + if D['low'] is None: + D['low'] = data['chart']['result'][0]['indicators']['quote'][0]['low'][-2] + D['vol'] = data['chart']['result'][0]['indicators']['quote'][0]['volume'][-1] + if D['vol'] is None: + D['vol'] = data['chart']['result'][0]['indicators']['quote'][0]['volume'][-2] + logger.debug(D) + except KeyError as e: + logger.error("Exception: exception getting stock data caused by %s" % e) + raise + + # check before adding to ES + if D['last'] is not None and D['high'] is not None and D['low'] is not None: + logger.info("Adding stock data to Elasticsearch...") + # add stock price info to elasticsearch + es.index(index=args.index, + doc_type="stock", + body={"symbol": D['symbol'], + "price_last": D['last'], + "date": D['date'], + "change": D['change'], + "price_high": D['high'], + "price_low": D['low'], + "vol": D['vol'] + }) + else: + logger.warning("Some stock data had null values, not adding to Elasticsearch") + + except Exception as e: + logger.error("Exception: can't get stock data, trying again later, reason is %s" % e) + pass + + logger.info("Will get stock data again in %s sec..." % args.frequency) + time.sleep(args.frequency) + + +if __name__ == '__main__': + + # parse cli args + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--index", metavar="INDEX", default="stocksight", + help="Index name for Elasticsearch (default: stocksight)") + parser.add_argument("-d", "--delindex", action="store_true", + help="Delete existing Elasticsearch index first") + parser.add_argument("-s", "--symbol", metavar="SYMBOL", + help="Stock symbol to use, example: TSLA") + parser.add_argument("-f", "--frequency", metavar="FREQUENCY", default=120, type=int, + help="How often in seconds to retrieve stock data (default: 120 sec)") + parser.add_argument("-v", "--verbose", action="store_true", + help="Increase output verbosity") + parser.add_argument("--debug", action="store_true", + help="Debug message output") + parser.add_argument("-q", "--quiet", action="store_true", + help="Run quiet with no message output") + parser.add_argument("-V", "--version", action="version", + version="stocksight v%s" % STOCKSIGHT_VERSION, + help="Prints version and exits") + args = parser.parse_args() + + # set up logging + logger = logging.getLogger('stocksight') + logger.setLevel(logging.INFO) + eslogger = logging.getLogger('elasticsearch') + eslogger.setLevel(logging.WARNING) + requestslogger = logging.getLogger('requests') + requestslogger.setLevel(logging.WARNING) + logging.addLevelName( + logging.INFO, "\033[1;32m%s\033[1;0m" + % logging.getLevelName(logging.INFO)) + logging.addLevelName( + logging.WARNING, "\033[1;31m%s\033[1;0m" + % logging.getLevelName(logging.WARNING)) + logging.addLevelName( + logging.ERROR, "\033[1;41m%s\033[1;0m" + % logging.getLevelName(logging.ERROR)) + logging.addLevelName( + logging.DEBUG, "\033[1;33m%s\033[1;0m" + % logging.getLevelName(logging.DEBUG)) + logformatter = '%(asctime)s [%(levelname)s][%(name)s] %(message)s' + loglevel = logging.INFO + logging.basicConfig(format=logformatter, level=loglevel) + if args.verbose: + logger.setLevel(logging.INFO) + eslogger.setLevel(logging.INFO) + requestslogger.setLevel(logging.INFO) + if args.debug: + logger.setLevel(logging.DEBUG) + eslogger.setLevel(logging.DEBUG) + requestslogger.setLevel(logging.DEBUG) + if args.quiet: + logger.disabled = True + eslogger.disabled = True + requestslogger.disabled = True + + # print banner + if not args.quiet: + c = randint(1, 4) + if c == 1: + color = '31m' + elif c == 2: + color = '32m' + elif c == 3: + color = '33m' + elif c == 4: + color = '35m' + + banner = """\033[%s + + /$$ /$$ /$$ /$$ /$$ + | $$ | $$ |__/ | $$ | $$ + /$$$$$$$ /$$$$$$ /$$$$$$ /$$$$$$$| $$ /$$ /$$$$$$$ /$$ /$$$$$$ | $$$$$$$ /$$$$$$ + /$$_____/|_ $$_/ /$$__ $$ /$$_____/| $$ /$$/ /$$_____/| $$ /$$__ $$| $$__ $$|_ $$_/ + | $$$$$$ | $$ | $$ \ $$| $$ | $$$$$$/ | $$$$$$ | $$| $$ \ $$| $$ \ $$ | $$ + \____ $$ | $$ /$$| $$ | $$| $$ | $$_ $$ \____ $$| $$| $$ | $$| $$ | $$ | $$ /$$ + /$$$$$$$/ | $$$$/| $$$$$$/| $$$$$$$| $$ \ $$ /$$$$$$$/| $$| $$$$$$$| $$ | $$ | $$$$/ + |_______/ \___/ \______/ \_______/|__/ \__/|_______/ |__/ \____ $$|__/ |__/ \___/ + /$$ \ $$ + :) = +$ :( = -$ | $$$$$$/ + \______/ v%s + \033[0m""" % (color, STOCKSIGHT_VERSION) + print(banner + '\n') + + # set up elasticsearch mappings and create index + mappings = { + "mappings": { + "stock": { + "properties": { + "symbol": { + "type": "keyword" + }, + "price_last": { + "type": "float" + }, + "date": { + "type": "date" + }, + "change": { + "type": "float" + }, + "price_high": { + "type": "float" + }, + "price_low": { + "type": "float" + }, + "vol": { + "type": "integer" + } + } + } + } + } + + if args.symbol is None: + print("No stock symbol, see -h for help.") + sys.exit(1) + + if args.delindex: + logger.info('Deleting existing Elasticsearch index ' + args.index) + es.indices.delete(index=args.index, ignore=[400, 404]) + + logger.info('Creating new Elasticsearch index or using existing ' + args.index) + es.indices.create(index=args.index, body=mappings, ignore=[400, 404]) + + # create instance of GetStock + stockprice = GetStock() + + try: + # get stock price + stockprice.get_price(symbol=args.symbol, url=url) + except Exception as e: + logger.warning("Exception: Failed to get stock data caused by: %s" % e) + except KeyboardInterrupt: + print("Ctrl-c keyboard interrupt, exiting...") + sys.exit(0) \ No newline at end of file diff --git a/src/stockprice.py b/src/stockprice.py index 4d6c68e..cff7ab4 100644 --- a/src/stockprice.py +++ b/src/stockprice.py @@ -6,10 +6,10 @@ for more information. Copyright (C) Chris Park 2018 +Copyright (C) Allen (Jian Feng) Xie 2019 stocksight is released under the Apache 2.0 license. See LICENSE for the full license text. """ - import argparse import logging import sys diff --git a/src/tweet.sentiment.py b/src/tweet.sentiment.py new file mode 100644 index 0000000..46d6704 --- /dev/null +++ b/src/tweet.sentiment.py @@ -0,0 +1,234 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""stockprice.py - get stock price from Yahoo and add to +Elasticsearch. +See README.md or https://github.com/shirosaidev/stocksight +for more information. + +Copyright (C) Chris Park 2018 +Copyright (C) Allen (Jian Feng) Xie 2019 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" + +import argparse +import sys +from random import randint + +from StockSight.TweetListener import * + + +STOCKSIGHT_VERSION = '0.1-b.6' +__version__ = STOCKSIGHT_VERSION + + +if __name__ == '__main__': + # parse cli args + parser = argparse.ArgumentParser() + parser.add_argument("-v", "--verbose", action="store_true", + help="Increase output verbosity") + parser.add_argument("--debug", action="store_true", + help="Debug message output") + parser.add_argument("-q", "--quiet", action="store_true", + help="Run quiet with no message output") + parser.add_argument("-V", "--version", action="version", + version="stocksight v%s" % STOCKSIGHT_VERSION, + help="Prints version and exits") + args = parser.parse_args() + + if args.verbose: + logger.setLevel(logging.INFO) + eslogger.setLevel(logging.INFO) + requestslogger.setLevel(logging.INFO) + if args.debug: + logger.setLevel(logging.DEBUG) + eslogger.setLevel(logging.DEBUG) + requestslogger.setLevel(logging.DEBUG) + if args.quiet: + logger.disabled = True + eslogger.disabled = True + requestslogger.disabled = True + + + # set up elasticsearch mappings and create index + mappings = { + "mappings": { + "tweet": { + "properties": { + "author": { + "type": "string", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "location": { + "type": "string", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "language": { + "type": "string", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "friends": { + "type": "long" + }, + "followers": { + "type": "long" + }, + "statuses": { + "type": "long" + }, + "date": { + "type": "date" + }, + "message": { + "type": "string", + "fields": { + "english": { + "type": "string", + "analyzer": "english" + }, + "keyword": { + "type": "keyword" + } + } + }, + "tweet_id": { + "type": "long" + }, + "polarity": { + "type": "float" + }, + "subjectivity": { + "type": "float" + }, + "sentiment": { + "type": "string", + "fields": { + "keyword": { + "type": "keyword" + } + } + } + } + } + } + } + + #TODO exit if the twitter keys are empty + if not consumer_key or \ + not consumer_secret or \ + not access_token or \ + not access_token_secret: + logger.warning("Invalid Twitter API cred") + sys.exit(1) + + + try: + for symbol in symbols: + try: + logger.info('Creating new Elasticsearch index or using existing ' + symbol) + es.indices.create(index=symbol, body=mappings, ignore=[400, 404]) + + logger.info('NLTK tokens required: ' + str(nltk_tokens_required)) + logger.info('NLTK tokens ignored: ' + str(nltk_tokens_ignored)) + logger.info("Scraping tweets for %s from %s ..." % (symbol)) + + # create instance of TweetStreamListener + TweetStreamListener = TweetStreamListener(symbol) + + # set twitter keys/tokens + auth = OAuthHandler(consumer_key, consumer_secret) + auth.set_access_token(access_token, access_token_secret) + api = API(auth) + + # create instance of the tweepy stream + stream = Stream(auth, TweetStreamListener) + + logger.info("Looking up Twitter user ids from usernames...") + useridlist = [] + while True: + for u in twitter_feeds: + try: + # get user id from screen name using twitter api + user = api.get_user(screen_name=u) + uid = int(user.id) + if uid not in useridlist: + useridlist.append(uid) + time.sleep(randint(0, 2)) + except TweepError as te: + # sleep a bit in case twitter suspends us + logger.warning("Tweepy exception: twitter api error caused by: %s" % te) + logger.info("Sleeping for a random amount of time and retrying...") + time.sleep(randint(1,10)) + continue + except KeyboardInterrupt: + logger.info("Ctrl-c keyboard interrupt, exiting...") + stream.disconnect() + sys.exit(0) + break + + if len(useridlist) > 0: + logger.info('Writing twitter user ids to text file %s' % TweetStreamListener.twitter_users_file) + try: + f = open(TweetStreamListener.twitter_users_file, "wt", encoding='utf-8') + for i in useridlist: + line = str(i) + "\n" + if type(line) is bytes: + line = line.decode('utf-8') + f.write(line) + f.close() + except (IOError, OSError) as e: + logger.warning("Exception: error writing to file caused by: %s" % e) + pass + except Exception as e: + raise + + try: + # search twitter for keywords + logger.info('NLTK tokens required: ' + str(nltk_tokens_required)) + logger.info('NLTK tokens ignored: ' + str(nltk_tokens_ignored)) + logger.info('Twitter Feeds: ' + str(twitter_feeds)) + logger.info('Twitter User Ids: ' + str(useridlist)) + logger.info('Twitter keywords: ' + str(args.keywords)) + logger.info('Listening for Tweets (ctrl-c to exit)...') + if args.keywords is None: + stream.filter(follow= str(useridlist), languages=['en']) + else: + # keywords to search on twitter + # add keywords to list + keywords = args.keywords.split(',') + + if args.index in nltk_tokens_required: + nltk_tokens = nltk_tokens_required[args.index] + else: + nltk_tokens = nltk_tokens_required['default'] + + # add tokens to keywords to list + for f in nltk_tokens: + keywords.append(f.lower()) + stream.filter(track=keywords, languages=['en']) + except TweepError as te: + logger.debug("Tweepy Exception: Failed to get tweets caused by: %s" % te) + except KeyboardInterrupt: + print("Ctrl-c keyboard interrupt, exiting...") + stream.disconnect() + sys.exit(0) + + except Exception as e: + logger.warning("%s" % e) + pass + + except KeyboardInterrupt: + print("Ctrl-c keyboard interrupt, exiting...") + sys.exit(0) \ No newline at end of file From fd9fe56ab53681552c76a6070cc1f6ee7f189622 Mon Sep 17 00:00:00 2001 From: heyqule Date: Mon, 2 Sep 2019 02:07:48 -0400 Subject: [PATCH 23/55] Change to wt --- src/import.kibana.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/import.kibana.py b/src/import.kibana.py index 6150bec..89b06c3 100644 --- a/src/import.kibana.py +++ b/src/import.kibana.py @@ -24,11 +24,10 @@ for symbol in symbols: try: ndjson_file_path = 'kibana_export/'+symbol+'_exports.ndjson' - if os.path.exists(ndjson_file_path) is False: - ndjson_file = open(ndjson_file_path, "xt", encoding='utf-8') - final_text = import_template.replace('tmpl',symbol) - ndjson_file.write(final_text) - ndjson_file.close() + ndjson_file = open(ndjson_file_path, "wt", encoding='utf-8') + final_text = import_template.replace('tmpl',symbol) + ndjson_file.write(final_text) + ndjson_file.close() kibana_import_url = 'http://kibana:5601/api/saved_objects/_import' payload = { 'overwrite': 'false'} From 3a3b4525c6fa21bfbe7d49ba644e6e58e349b00b Mon Sep 17 00:00:00 2001 From: heyqule Date: Mon, 2 Sep 2019 14:13:48 -0400 Subject: [PATCH 24/55] Change Mapping to 7.3 format Twitter Stream sentiment is now supported --- src/StockSight/EsMap/Sentiment.py | 32 ++++ src/StockSight/EsMap/StockPrice.py | 28 +++ src/StockSight/NewsHeadlineListener.py | 5 +- src/StockSight/StockPriceListener.py | 4 +- src/StockSight/TweetListener.py | 47 ++--- src/import.kibana.py | 2 +- src/kibana_export/export.7.3.ndjson | 2 +- src/news.sentiment.py | 54 +----- src/{sentiment.og.py => sentiment.py} | 0 src/startup.sh | 10 +- src/stockprice.docker.py | 98 ++++++++++ src/stockprice.og.py | 254 ------------------------- src/stockprice.py | 165 +++++++++++++--- src/tweet.sentiment.py | 242 ++++++++--------------- 14 files changed, 417 insertions(+), 526 deletions(-) create mode 100644 src/StockSight/EsMap/Sentiment.py create mode 100644 src/StockSight/EsMap/StockPrice.py rename src/{sentiment.og.py => sentiment.py} (100%) create mode 100644 src/stockprice.docker.py delete mode 100644 src/stockprice.og.py diff --git a/src/StockSight/EsMap/Sentiment.py b/src/StockSight/EsMap/Sentiment.py new file mode 100644 index 0000000..b7f1008 --- /dev/null +++ b/src/StockSight/EsMap/Sentiment.py @@ -0,0 +1,32 @@ +# set up elasticsearch mappings and create index +mapping = { + "mappings": { + "properties": { + "author": { + "type": "keyword", + }, + "location": { + "type": "keyword", + }, + "date": { + "type": "date" + }, + "message": { + "type": "keyword", + }, + "msg_id": { + "type": "text" + }, + "polarity": { + "type": "float" + }, + "subjectivity": { + "type": "float" + }, + "sentiment": { + "type": "keyword", + } + } + } +} + diff --git a/src/StockSight/EsMap/StockPrice.py b/src/StockSight/EsMap/StockPrice.py new file mode 100644 index 0000000..d5a87fb --- /dev/null +++ b/src/StockSight/EsMap/StockPrice.py @@ -0,0 +1,28 @@ +# set up elasticsearch mappings and create index +mapping = { + "mappings": { + "properties": { + "symbol": { + "type": "keyword" + }, + "price_last": { + "type": "float" + }, + "date": { + "type": "date" + }, + "change": { + "type": "float" + }, + "price_high": { + "type": "float" + }, + "price_low": { + "type": "float" + }, + "vol": { + "type": "integer" + } + } + } +} \ No newline at end of file diff --git a/src/StockSight/NewsHeadlineListener.py b/src/StockSight/NewsHeadlineListener.py index d534b7e..696179a 100644 --- a/src/StockSight/NewsHeadlineListener.py +++ b/src/StockSight/NewsHeadlineListener.py @@ -87,12 +87,13 @@ def __init__(self, symbol,url=None): logger.info("Adding news headline to elasticsearch") # add news headline data and sentiment info to elasticsearch - es.index(index='stocksight_'+self.symbol, - doc_type="newsheadline", + es.index(index="stocksight_"+self.symbol+"_sentiment", + doc_type="_doc", body={"date": datenow, "location": htext_url, "message": htext, "polarity": polarity, + "msg_id": md5_hash, "subjectivity": subjectivity, "sentiment": sentiment}) rds.set(md5_hash,1,self.cache_length) diff --git a/src/StockSight/StockPriceListener.py b/src/StockSight/StockPriceListener.py index 9eca371..c4fb7fe 100644 --- a/src/StockSight/StockPriceListener.py +++ b/src/StockSight/StockPriceListener.py @@ -84,8 +84,8 @@ def get_price(self, symbol): if D['last'] is not None and D['high'] is not None and D['low'] is not None: logger.info("Adding stock data to Elasticsearch...") # add stock price info to elasticsearch - es.index(index=symbol, - doc_type="stock", + es.index(index="stocksight_"+symbol+"_price", + doc_type="_doc", body={"symbol": D['symbol'], "price_last": D['last'], "date": D['date'], diff --git a/src/StockSight/TweetListener.py b/src/StockSight/TweetListener.py index 30db6db..4f08cd5 100644 --- a/src/StockSight/TweetListener.py +++ b/src/StockSight/TweetListener.py @@ -19,22 +19,17 @@ from config import * from StockSight.Initializer.ElasticSearch import es +from StockSight.Initializer.Redis import rds from StockSight.Helper.Sentiment import * from tweepy.streaming import StreamListener -from tweepy import API, Stream, OAuthHandler, TweepError -class TweetStreamListener(StreamListener): - # tweet id list - tweet_ids = [] +class TweetStreamListener(StreamListener): # file to hold twitter user ids twitter_users_file = './twitteruserids.txt' - def __init__(self,symbol): - self.symbol = symbol - # on success def on_data(self, data): @@ -100,13 +95,17 @@ def on_data(self, data): # do some checks before adding to elasticsearch and crawling urls in tweet if friends == 0 or \ - followers == 0 or \ + followers < min_followers or \ statuses == 0 or \ - text == "" or \ - tweetid in self.tweet_ids: + text == "": logger.info("Tweet doesn't meet min requirements, not adding") return True + redis_id = 'tweet'+str(tweetid); + if rds.exists(redis_id): + logger.info("Tweet already exists") + return True + # check ignored tokens from config for t in nltk_tokens_ignored: if t in tokens: @@ -114,10 +113,16 @@ def on_data(self, data): return True # check required tokens from config tokenspass = False - for t in nltk_tokens_required: - if t in tokens: - tokenspass = True + for key, nltk_tokens_required_sublist in nltk_tokens_required.items(): + if(key == 'default'): continue + self.symbol = key + for t in nltk_tokens_required_sublist: + if t in tokens: + tokenspass = True + break + if tokenspass: break + if not tokenspass: logger.info("Tweet does not contain token from required list, not adding") return True @@ -129,29 +134,25 @@ def on_data(self, data): # get sentiment values polarity, subjectivity, sentiment = sentiment_analysis(tweet) - # add tweet_id to list - self.tweet_ids.append(dict_data["id"]) - # remove hashtags for elasticsearch #text_filtered = re.sub(r"[#|@|\$]\S+", "", text_filtered) logger.info("Adding tweet to elasticsearch") # add twitter data and sentiment info to elasticsearch - es.index(index=self.symbol, - doc_type="tweet", + es.index(index="stocksight_"+self.symbol+"_sentiment", + doc_type="_doc", body={"author": screen_name, "location": location, - "language": language, - "friends": friends, - "followers": followers, - "statuses": statuses, "date": created_date, "message": text_filtered, - "tweet_id": tweetid, + "msg_id": redis_id, "polarity": polarity, "subjectivity": subjectivity, "sentiment": sentiment}) + # add tweet_id to cache + rds.set(redis_id,1,86400) + return True except Exception as e: diff --git a/src/import.kibana.py b/src/import.kibana.py index 89b06c3..2026800 100644 --- a/src/import.kibana.py +++ b/src/import.kibana.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -"""stockprice.py - get stock price from Yahoo and add to +"""import.kibana.py - import kabana visual for each defined symbol Elasticsearch. See README.md or https://github.com/shirosaidev/stocksight for more information. diff --git a/src/kibana_export/export.7.3.ndjson b/src/kibana_export/export.7.3.ndjson index fe85513..94aea5e 100644 --- a/src/kibana_export/export.7.3.ndjson +++ b/src/kibana_export/export.7.3.ndjson @@ -1,4 +1,4 @@ -{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"esTypes\":[\"_id\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"esTypes\":[\"_index\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"esTypes\":[\"_source\"],\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"esTypes\":[\"_type\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"date\",\"type\":\"date\",\"esTypes\":[\"date\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"location\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"location.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"location\",\"subType\":\"multi\"},{\"name\":\"message\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"message.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"message\",\"subType\":\"multi\"},{\"name\":\"polarity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"sentiment\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"sentiment.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"sentiment\",\"subType\":\"multi\"},{\"name\":\"subjectivity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"date","title":"stocksight_tmpl"},"id":"tmpl_index_pattern","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2019-08-31T04:11:15.828Z","version":"WzcsMV0="} +{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"esTypes\":[\"_id\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"esTypes\":[\"_index\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"esTypes\":[\"_source\"],\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"esTypes\":[\"_type\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"date\",\"type\":\"date\",\"esTypes\":[\"date\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"location\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"location.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"location\",\"subType\":\"multi\"},{\"name\":\"message\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"message.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"message\",\"subType\":\"multi\"},{\"name\":\"polarity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"sentiment\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"sentiment.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"sentiment\",\"subType\":\"multi\"},{\"name\":\"subjectivity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"date","title":"stocksight_tmpl_*"},"id":"tmpl_index_pattern","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2019-08-31T04:11:15.828Z","version":"WzcsMV0="} {"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl_polarity","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl_polarity\",\"type\":\"metric\",\"params\":{\"metric\":{\"percentageMode\":false,\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"metricColorMode\":\"None\",\"colorsRange\":[{\"type\":\"range\",\"from\":0,\"to\":10000}],\"labels\":{\"show\":true},\"invertColors\":false,\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\",\"fontSize\":32}},\"dimensions\":{\"metrics\":[{\"type\":\"vis_dimension\",\"accessor\":0,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":1,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":2,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":3,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":4,\"format\":{\"id\":\"number\",\"params\":{}}}]},\"addTooltip\":true,\"addLegend\":false,\"type\":\"metric\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"median\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\",\"percents\":[50],\"customLabel\":\"\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"5\",\"enabled\":true,\"type\":\"min\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}}]}"},"id":"tmpl_polarity_visual","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl_index_pattern","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-08-31T04:17:03.654Z","version":"WzgsMV0="} {"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl_sentinel","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl_sentinel\",\"type\":\"pie\",\"params\":{\"type\":\"pie\",\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"top\",\"isDonut\":false,\"labels\":{\"show\":true,\"values\":true,\"last_level\":true,\"truncate\":100},\"dimensions\":{\"metric\":{\"accessor\":1,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"},\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":5,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\"}}]}"},"id":"tmpl_polarity_sentinel","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl_index_pattern","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-08-31T04:19:36.071Z","version":"WzksMV0="} {"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl_articles","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl_articles\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}}]}"},"id":"tmpl_articles","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl_index_pattern","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-08-31T04:20:24.604Z","version":"WzEwLDFd"} diff --git a/src/news.sentiment.py b/src/news.sentiment.py index c446ac4..7243138 100644 --- a/src/news.sentiment.py +++ b/src/news.sentiment.py @@ -15,6 +15,7 @@ import sys from StockSight.NewsHeadlineListener import * +from StockSight.EsMap.Sentiment import * STOCKSIGHT_VERSION = '0.1-b.6' @@ -48,62 +49,11 @@ eslogger.disabled = True requestslogger.disabled = True - - # set up elasticsearch mappings and create index - mappings = { - "mappings": { - "newsheadline": { - "properties": { - "msg_id": { - "type": "string" - }, - "date": { - "type": "date" - }, - "location": { - "type": "string", - "fields": { - "keyword": { - "type": "keyword" - } - } - }, - "message": { - "type": "string", - "fields": { - "english": { - "type": "string", - "analyzer": "english" - }, - "keyword": { - "type": "keyword" - } - } - }, - "polarity": { - "type": "float" - }, - "subjectivity": { - "type": "float" - }, - "sentiment": { - "type": "string", - "fields": { - "keyword": { - "type": "keyword" - } - } - } - } - } - } - } - try: for symbol in symbols: try: logger.info('Creating new Elasticsearch index or using existing ' + symbol) - es.indices.create(index=symbol, body=mappings, ignore=[400, 404]) + es.indices.create(index="stocksight_"+symbol+"_sentiment", body=mapping, ignore=[400, 404]) url = "https://finance.yahoo.com/quote/%s/?p=%s" % (symbol, symbol) logger.info('NLTK tokens required: ' + str(nltk_tokens_required)) diff --git a/src/sentiment.og.py b/src/sentiment.py similarity index 100% rename from src/sentiment.og.py rename to src/sentiment.py diff --git a/src/startup.sh b/src/startup.sh index 25527c1..08f0459 100755 --- a/src/startup.sh +++ b/src/startup.sh @@ -4,8 +4,8 @@ #stocksight is released under the Apache 2.0 license. See #LICENSE for the full license text. -echo "Waiting for other dependent instances to spawn... (60 seconds)" -sleep 60; +echo "Waiting for other dependent instances to spawn... (30 seconds)" +sleep 30; echo "Copy kibana dashboard if they don't exist"; python import.kibana.py @@ -14,13 +14,13 @@ tick_time=900 tick=0 let sentiment_time=900*4 -#echo "Spawning Tweet Sentiment receiver instance"; -#python tweet.sentiment.py & +echo "Spawning Tweet Sentiment receiver instance"; +python tweet.sentiment.py & while true do echo "Spawning stock price receiver instance"; - python stockprice.py & + python stockprice.docker.py & echo "Will get stock data again in ${tick_time} sec..."; let tick_mod=tick%4 diff --git a/src/stockprice.docker.py b/src/stockprice.docker.py new file mode 100644 index 0000000..7ba3d15 --- /dev/null +++ b/src/stockprice.docker.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""stockprice.docker.py - get stock price from Yahoo and add to +Elasticsearch. +See README.md or https://github.com/shirosaidev/stocksight +for more information. + +Copyright (C) Chris Park 2018 +Copyright (C) Allen (Jian Feng) Xie 2019 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" +import argparse +import logging +import sys + +from config import symbols +from StockSight.Initializer.ElasticSearch import es +from StockSight.EsMap.StockPrice import mapping +from StockSight.StockPriceListener import StockPriceListener + + +STOCKSIGHT_VERSION = '0.1-b.5' +__version__ = STOCKSIGHT_VERSION + + + + +if __name__ == '__main__': + + # parse cli args + parser = argparse.ArgumentParser() + + parser.add_argument("-v", "--verbose", action="store_true", + help="Increase output verbosity") + parser.add_argument("--debug", action="store_true", + help="Debug message output") + parser.add_argument("-q", "--quiet", action="store_true", + help="Run quiet with no message output") + parser.add_argument("-V", "--version", action="version", + version="stocksight v%s" % STOCKSIGHT_VERSION, + help="Prints version and exits") + args = parser.parse_args() + + # set up logging + logger = logging.getLogger('stocksight') + logger.setLevel(logging.INFO) + eslogger = logging.getLogger('elasticsearch') + eslogger.setLevel(logging.WARNING) + requestslogger = logging.getLogger('requests') + requestslogger.setLevel(logging.WARNING) + logging.addLevelName( + logging.INFO, "\033[1;32m%s\033[1;0m" + % logging.getLevelName(logging.INFO)) + logging.addLevelName( + logging.WARNING, "\033[1;31m%s\033[1;0m" + % logging.getLevelName(logging.WARNING)) + logging.addLevelName( + logging.ERROR, "\033[1;41m%s\033[1;0m" + % logging.getLevelName(logging.ERROR)) + logging.addLevelName( + logging.DEBUG, "\033[1;33m%s\033[1;0m" + % logging.getLevelName(logging.DEBUG)) + logformatter = '%(asctime)s [%(levelname)s][%(name)s] %(message)s' + loglevel = logging.INFO + logging.basicConfig(format=logformatter, level=loglevel) + if args.verbose: + logger.setLevel(logging.INFO) + eslogger.setLevel(logging.INFO) + requestslogger.setLevel(logging.INFO) + if args.debug: + logger.setLevel(logging.DEBUG) + eslogger.setLevel(logging.DEBUG) + requestslogger.setLevel(logging.DEBUG) + if args.quiet: + logger.disabled = True + eslogger.disabled = True + requestslogger.disabled = True + + try: + for symbol in symbols: + try: + logger.info('Creating new Elasticsearch index or using existing ' + symbol) + es.indices.create(index="stocksight_"+symbol+"_price", body=mapping, ignore=[400, 404]) + + stockprice = StockPriceListener() + + stockprice.get_price(symbol=symbol) + except Exception as e: + logger.warning("%s" % e) + pass + # get stock price + + except Exception as e: + logger.warning("Exception: Failed to get stock data caused by: %s" % e) + except KeyboardInterrupt: + print("Ctrl-c keyboard interrupt, exiting...") + sys.exit(0) diff --git a/src/stockprice.og.py b/src/stockprice.og.py deleted file mode 100644 index 9bb940e..0000000 --- a/src/stockprice.og.py +++ /dev/null @@ -1,254 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -"""stockprice.py - get stock price from Yahoo and add to -Elasticsearch. -See README.md or https://github.com/shirosaidev/stocksight -for more information. - -Copyright (C) Chris Park 2018 -stocksight is released under the Apache 2.0 license. See -LICENSE for the full license text. -""" - -import time -import requests -import re -import argparse -import logging -import sys -try: - from elasticsearch5 import Elasticsearch -except ImportError: - from elasticsearch import Elasticsearch -from random import randint - -# import elasticsearch host -from config import elasticsearch_host, elasticsearch_port, elasticsearch_user, elasticsearch_password - - -STOCKSIGHT_VERSION = '0.1-b.5' -__version__ = STOCKSIGHT_VERSION - -# url to fetch stock price from, SYMBOL will be replaced with symbol from cli args -url = "https://query1.finance.yahoo.com/v8/finance/chart/SYMBOL?region=US&lang=en-US&includePrePost=false&interval=2m&range=5d&corsDomain=finance.yahoo.com&.tsrc=finance" - -# create instance of elasticsearch -es = Elasticsearch(hosts=[{'host': elasticsearch_host, 'port': elasticsearch_port}], - http_auth=(elasticsearch_user, elasticsearch_password)) - -class GetStock: - - def get_price(self, url, symbol): - import re - - while True: - - logger.info("Grabbing stock data for symbol %s..." % symbol) - - try: - - # add stock symbol to url - url = re.sub("SYMBOL", symbol, url) - # get stock data (json) from url - try: - r = requests.get(url) - data = r.json() - except (requests.HTTPError, requests.ConnectionError, requests.ConnectTimeout) as re: - logger.error("Exception: exception getting stock data from url caused by %s" % re) - raise - logger.debug(data) - # build dict to store stock info - try: - D = {} - D['symbol'] = symbol - D['last'] = data['chart']['result'][0]['indicators']['quote'][0]['close'][-1] - if D['last'] is None: - D['last'] = data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] - D['date'] = time.strftime('%Y-%m-%dT%H:%M:%S', time.gmtime()) # time now in gmt (utc) - try: - D['change'] = (data['chart']['result'][0]['indicators']['quote'][0]['close'][-1] - - data['chart']['result'][0]['indicators']['quote'][0]['close'][-2]) / \ - data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] * 100 - except TypeError: - D['change'] = (data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] - - data['chart']['result'][0]['indicators']['quote'][0]['close'][-3]) / \ - data['chart']['result'][0]['indicators']['quote'][0]['close'][-3] * 100 - pass - D['high'] = data['chart']['result'][0]['indicators']['quote'][0]['high'][-1] - if D['high'] is None: - D['high'] = data['chart']['result'][0]['indicators']['quote'][0]['high'][-2] - D['low'] = data['chart']['result'][0]['indicators']['quote'][0]['low'][-1] - if D['low'] is None: - D['low'] = data['chart']['result'][0]['indicators']['quote'][0]['low'][-2] - D['vol'] = data['chart']['result'][0]['indicators']['quote'][0]['volume'][-1] - if D['vol'] is None: - D['vol'] = data['chart']['result'][0]['indicators']['quote'][0]['volume'][-2] - logger.debug(D) - except KeyError as e: - logger.error("Exception: exception getting stock data caused by %s" % e) - raise - - # check before adding to ES - if D['last'] is not None and D['high'] is not None and D['low'] is not None: - logger.info("Adding stock data to Elasticsearch...") - # add stock price info to elasticsearch - es.index(index=args.index, - doc_type="stock", - body={"symbol": D['symbol'], - "price_last": D['last'], - "date": D['date'], - "change": D['change'], - "price_high": D['high'], - "price_low": D['low'], - "vol": D['vol'] - }) - else: - logger.warning("Some stock data had null values, not adding to Elasticsearch") - - except Exception as e: - logger.error("Exception: can't get stock data, trying again later, reason is %s" % e) - pass - - logger.info("Will get stock data again in %s sec..." % args.frequency) - time.sleep(args.frequency) - - -if __name__ == '__main__': - - # parse cli args - parser = argparse.ArgumentParser() - parser.add_argument("-i", "--index", metavar="INDEX", default="stocksight", - help="Index name for Elasticsearch (default: stocksight)") - parser.add_argument("-d", "--delindex", action="store_true", - help="Delete existing Elasticsearch index first") - parser.add_argument("-s", "--symbol", metavar="SYMBOL", - help="Stock symbol to use, example: TSLA") - parser.add_argument("-f", "--frequency", metavar="FREQUENCY", default=120, type=int, - help="How often in seconds to retrieve stock data (default: 120 sec)") - parser.add_argument("-v", "--verbose", action="store_true", - help="Increase output verbosity") - parser.add_argument("--debug", action="store_true", - help="Debug message output") - parser.add_argument("-q", "--quiet", action="store_true", - help="Run quiet with no message output") - parser.add_argument("-V", "--version", action="version", - version="stocksight v%s" % STOCKSIGHT_VERSION, - help="Prints version and exits") - args = parser.parse_args() - - # set up logging - logger = logging.getLogger('stocksight') - logger.setLevel(logging.INFO) - eslogger = logging.getLogger('elasticsearch') - eslogger.setLevel(logging.WARNING) - requestslogger = logging.getLogger('requests') - requestslogger.setLevel(logging.WARNING) - logging.addLevelName( - logging.INFO, "\033[1;32m%s\033[1;0m" - % logging.getLevelName(logging.INFO)) - logging.addLevelName( - logging.WARNING, "\033[1;31m%s\033[1;0m" - % logging.getLevelName(logging.WARNING)) - logging.addLevelName( - logging.ERROR, "\033[1;41m%s\033[1;0m" - % logging.getLevelName(logging.ERROR)) - logging.addLevelName( - logging.DEBUG, "\033[1;33m%s\033[1;0m" - % logging.getLevelName(logging.DEBUG)) - logformatter = '%(asctime)s [%(levelname)s][%(name)s] %(message)s' - loglevel = logging.INFO - logging.basicConfig(format=logformatter, level=loglevel) - if args.verbose: - logger.setLevel(logging.INFO) - eslogger.setLevel(logging.INFO) - requestslogger.setLevel(logging.INFO) - if args.debug: - logger.setLevel(logging.DEBUG) - eslogger.setLevel(logging.DEBUG) - requestslogger.setLevel(logging.DEBUG) - if args.quiet: - logger.disabled = True - eslogger.disabled = True - requestslogger.disabled = True - - # print banner - if not args.quiet: - c = randint(1, 4) - if c == 1: - color = '31m' - elif c == 2: - color = '32m' - elif c == 3: - color = '33m' - elif c == 4: - color = '35m' - - banner = """\033[%s - - /$$ /$$ /$$ /$$ /$$ - | $$ | $$ |__/ | $$ | $$ - /$$$$$$$ /$$$$$$ /$$$$$$ /$$$$$$$| $$ /$$ /$$$$$$$ /$$ /$$$$$$ | $$$$$$$ /$$$$$$ - /$$_____/|_ $$_/ /$$__ $$ /$$_____/| $$ /$$/ /$$_____/| $$ /$$__ $$| $$__ $$|_ $$_/ - | $$$$$$ | $$ | $$ \ $$| $$ | $$$$$$/ | $$$$$$ | $$| $$ \ $$| $$ \ $$ | $$ - \____ $$ | $$ /$$| $$ | $$| $$ | $$_ $$ \____ $$| $$| $$ | $$| $$ | $$ | $$ /$$ - /$$$$$$$/ | $$$$/| $$$$$$/| $$$$$$$| $$ \ $$ /$$$$$$$/| $$| $$$$$$$| $$ | $$ | $$$$/ - |_______/ \___/ \______/ \_______/|__/ \__/|_______/ |__/ \____ $$|__/ |__/ \___/ - /$$ \ $$ - :) = +$ :( = -$ | $$$$$$/ - \______/ v%s - \033[0m""" % (color, STOCKSIGHT_VERSION) - print(banner + '\n') - - # set up elasticsearch mappings and create index - mappings = { - "mappings": { - "stock": { - "properties": { - "symbol": { - "type": "keyword" - }, - "price_last": { - "type": "float" - }, - "date": { - "type": "date" - }, - "change": { - "type": "float" - }, - "price_high": { - "type": "float" - }, - "price_low": { - "type": "float" - }, - "vol": { - "type": "integer" - } - } - } - } - } - - if args.symbol is None: - print("No stock symbol, see -h for help.") - sys.exit(1) - - if args.delindex: - logger.info('Deleting existing Elasticsearch index ' + args.index) - es.indices.delete(index=args.index, ignore=[400, 404]) - - logger.info('Creating new Elasticsearch index or using existing ' + args.index) - es.indices.create(index=args.index, body=mappings, ignore=[400, 404]) - - # create instance of GetStock - stockprice = GetStock() - - try: - # get stock price - stockprice.get_price(symbol=args.symbol, url=url) - except Exception as e: - logger.warning("Exception: Failed to get stock data caused by: %s" % e) - except KeyboardInterrupt: - print("Ctrl-c keyboard interrupt, exiting...") - sys.exit(0) \ No newline at end of file diff --git a/src/stockprice.py b/src/stockprice.py index cff7ab4..9bb940e 100644 --- a/src/stockprice.py +++ b/src/stockprice.py @@ -6,35 +6,125 @@ for more information. Copyright (C) Chris Park 2018 -Copyright (C) Allen (Jian Feng) Xie 2019 stocksight is released under the Apache 2.0 license. See LICENSE for the full license text. """ + +import time +import requests +import re import argparse import logging import sys -import time -import datetime -import re -import requests -from pytz import timezone +try: + from elasticsearch5 import Elasticsearch +except ImportError: + from elasticsearch import Elasticsearch +from random import randint -from config import symbols -from StockSight.Initializer.ElasticSearch import es -from StockSight.StockPriceListener import StockPriceListener +# import elasticsearch host +from config import elasticsearch_host, elasticsearch_port, elasticsearch_user, elasticsearch_password STOCKSIGHT_VERSION = '0.1-b.5' __version__ = STOCKSIGHT_VERSION +# url to fetch stock price from, SYMBOL will be replaced with symbol from cli args +url = "https://query1.finance.yahoo.com/v8/finance/chart/SYMBOL?region=US&lang=en-US&includePrePost=false&interval=2m&range=5d&corsDomain=finance.yahoo.com&.tsrc=finance" + +# create instance of elasticsearch +es = Elasticsearch(hosts=[{'host': elasticsearch_host, 'port': elasticsearch_port}], + http_auth=(elasticsearch_user, elasticsearch_password)) + +class GetStock: + + def get_price(self, url, symbol): + import re + + while True: + + logger.info("Grabbing stock data for symbol %s..." % symbol) + + try: + + # add stock symbol to url + url = re.sub("SYMBOL", symbol, url) + # get stock data (json) from url + try: + r = requests.get(url) + data = r.json() + except (requests.HTTPError, requests.ConnectionError, requests.ConnectTimeout) as re: + logger.error("Exception: exception getting stock data from url caused by %s" % re) + raise + logger.debug(data) + # build dict to store stock info + try: + D = {} + D['symbol'] = symbol + D['last'] = data['chart']['result'][0]['indicators']['quote'][0]['close'][-1] + if D['last'] is None: + D['last'] = data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] + D['date'] = time.strftime('%Y-%m-%dT%H:%M:%S', time.gmtime()) # time now in gmt (utc) + try: + D['change'] = (data['chart']['result'][0]['indicators']['quote'][0]['close'][-1] - + data['chart']['result'][0]['indicators']['quote'][0]['close'][-2]) / \ + data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] * 100 + except TypeError: + D['change'] = (data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] - + data['chart']['result'][0]['indicators']['quote'][0]['close'][-3]) / \ + data['chart']['result'][0]['indicators']['quote'][0]['close'][-3] * 100 + pass + D['high'] = data['chart']['result'][0]['indicators']['quote'][0]['high'][-1] + if D['high'] is None: + D['high'] = data['chart']['result'][0]['indicators']['quote'][0]['high'][-2] + D['low'] = data['chart']['result'][0]['indicators']['quote'][0]['low'][-1] + if D['low'] is None: + D['low'] = data['chart']['result'][0]['indicators']['quote'][0]['low'][-2] + D['vol'] = data['chart']['result'][0]['indicators']['quote'][0]['volume'][-1] + if D['vol'] is None: + D['vol'] = data['chart']['result'][0]['indicators']['quote'][0]['volume'][-2] + logger.debug(D) + except KeyError as e: + logger.error("Exception: exception getting stock data caused by %s" % e) + raise + + # check before adding to ES + if D['last'] is not None and D['high'] is not None and D['low'] is not None: + logger.info("Adding stock data to Elasticsearch...") + # add stock price info to elasticsearch + es.index(index=args.index, + doc_type="stock", + body={"symbol": D['symbol'], + "price_last": D['last'], + "date": D['date'], + "change": D['change'], + "price_high": D['high'], + "price_low": D['low'], + "vol": D['vol'] + }) + else: + logger.warning("Some stock data had null values, not adding to Elasticsearch") + + except Exception as e: + logger.error("Exception: can't get stock data, trying again later, reason is %s" % e) + pass + logger.info("Will get stock data again in %s sec..." % args.frequency) + time.sleep(args.frequency) if __name__ == '__main__': # parse cli args parser = argparse.ArgumentParser() - + parser.add_argument("-i", "--index", metavar="INDEX", default="stocksight", + help="Index name for Elasticsearch (default: stocksight)") + parser.add_argument("-d", "--delindex", action="store_true", + help="Delete existing Elasticsearch index first") + parser.add_argument("-s", "--symbol", metavar="SYMBOL", + help="Stock symbol to use, example: TSLA") + parser.add_argument("-f", "--frequency", metavar="FREQUENCY", default=120, type=int, + help="How often in seconds to retrieve stock data (default: 120 sec)") parser.add_argument("-v", "--verbose", action="store_true", help="Increase output verbosity") parser.add_argument("--debug", action="store_true", @@ -81,6 +171,34 @@ eslogger.disabled = True requestslogger.disabled = True + # print banner + if not args.quiet: + c = randint(1, 4) + if c == 1: + color = '31m' + elif c == 2: + color = '32m' + elif c == 3: + color = '33m' + elif c == 4: + color = '35m' + + banner = """\033[%s + + /$$ /$$ /$$ /$$ /$$ + | $$ | $$ |__/ | $$ | $$ + /$$$$$$$ /$$$$$$ /$$$$$$ /$$$$$$$| $$ /$$ /$$$$$$$ /$$ /$$$$$$ | $$$$$$$ /$$$$$$ + /$$_____/|_ $$_/ /$$__ $$ /$$_____/| $$ /$$/ /$$_____/| $$ /$$__ $$| $$__ $$|_ $$_/ + | $$$$$$ | $$ | $$ \ $$| $$ | $$$$$$/ | $$$$$$ | $$| $$ \ $$| $$ \ $$ | $$ + \____ $$ | $$ /$$| $$ | $$| $$ | $$_ $$ \____ $$| $$| $$ | $$| $$ | $$ | $$ /$$ + /$$$$$$$/ | $$$$/| $$$$$$/| $$$$$$$| $$ \ $$ /$$$$$$$/| $$| $$$$$$$| $$ | $$ | $$$$/ + |_______/ \___/ \______/ \_______/|__/ \__/|_______/ |__/ \____ $$|__/ |__/ \___/ + /$$ \ $$ + :) = +$ :( = -$ | $$$$$$/ + \______/ v%s + \033[0m""" % (color, STOCKSIGHT_VERSION) + print(banner + '\n') + # set up elasticsearch mappings and create index mappings = { "mappings": { @@ -112,22 +230,25 @@ } } - try: - for symbol in symbols: - try: - logger.info('Creating new Elasticsearch index or using existing ' + symbol) - es.indices.create(index=symbol, body=mappings, ignore=[400, 404]) + if args.symbol is None: + print("No stock symbol, see -h for help.") + sys.exit(1) - stockprice = StockPriceListener() + if args.delindex: + logger.info('Deleting existing Elasticsearch index ' + args.index) + es.indices.delete(index=args.index, ignore=[400, 404]) - stockprice.get_price(symbol=symbol) - except Exception as e: - logger.warning("%s" % e) - pass - # get stock price + logger.info('Creating new Elasticsearch index or using existing ' + args.index) + es.indices.create(index=args.index, body=mappings, ignore=[400, 404]) + # create instance of GetStock + stockprice = GetStock() + + try: + # get stock price + stockprice.get_price(symbol=args.symbol, url=url) except Exception as e: logger.warning("Exception: Failed to get stock data caused by: %s" % e) except KeyboardInterrupt: print("Ctrl-c keyboard interrupt, exiting...") - sys.exit(0) + sys.exit(0) \ No newline at end of file diff --git a/src/tweet.sentiment.py b/src/tweet.sentiment.py index 46d6704..75cec62 100644 --- a/src/tweet.sentiment.py +++ b/src/tweet.sentiment.py @@ -16,6 +16,8 @@ from random import randint from StockSight.TweetListener import * +from StockSight.EsMap.Sentiment import * +from tweepy import API, Stream, OAuthHandler, TweepError STOCKSIGHT_VERSION = '0.1-b.6' @@ -50,80 +52,7 @@ requestslogger.disabled = True - # set up elasticsearch mappings and create index - mappings = { - "mappings": { - "tweet": { - "properties": { - "author": { - "type": "string", - "fields": { - "keyword": { - "type": "keyword" - } - } - }, - "location": { - "type": "string", - "fields": { - "keyword": { - "type": "keyword" - } - } - }, - "language": { - "type": "string", - "fields": { - "keyword": { - "type": "keyword" - } - } - }, - "friends": { - "type": "long" - }, - "followers": { - "type": "long" - }, - "statuses": { - "type": "long" - }, - "date": { - "type": "date" - }, - "message": { - "type": "string", - "fields": { - "english": { - "type": "string", - "analyzer": "english" - }, - "keyword": { - "type": "keyword" - } - } - }, - "tweet_id": { - "type": "long" - }, - "polarity": { - "type": "float" - }, - "subjectivity": { - "type": "float" - }, - "sentiment": { - "type": "string", - "fields": { - "keyword": { - "type": "keyword" - } - } - } - } - } - } - } + #TODO exit if the twitter keys are empty if not consumer_key or \ @@ -136,98 +65,83 @@ try: for symbol in symbols: - try: - logger.info('Creating new Elasticsearch index or using existing ' + symbol) - es.indices.create(index=symbol, body=mappings, ignore=[400, 404]) - - logger.info('NLTK tokens required: ' + str(nltk_tokens_required)) - logger.info('NLTK tokens ignored: ' + str(nltk_tokens_ignored)) - logger.info("Scraping tweets for %s from %s ..." % (symbol)) - - # create instance of TweetStreamListener - TweetStreamListener = TweetStreamListener(symbol) - - # set twitter keys/tokens - auth = OAuthHandler(consumer_key, consumer_secret) - auth.set_access_token(access_token, access_token_secret) - api = API(auth) - - # create instance of the tweepy stream - stream = Stream(auth, TweetStreamListener) - - logger.info("Looking up Twitter user ids from usernames...") - useridlist = [] - while True: - for u in twitter_feeds: - try: - # get user id from screen name using twitter api - user = api.get_user(screen_name=u) - uid = int(user.id) - if uid not in useridlist: - useridlist.append(uid) - time.sleep(randint(0, 2)) - except TweepError as te: - # sleep a bit in case twitter suspends us - logger.warning("Tweepy exception: twitter api error caused by: %s" % te) - logger.info("Sleeping for a random amount of time and retrying...") - time.sleep(randint(1,10)) - continue - except KeyboardInterrupt: - logger.info("Ctrl-c keyboard interrupt, exiting...") - stream.disconnect() - sys.exit(0) - break - - if len(useridlist) > 0: - logger.info('Writing twitter user ids to text file %s' % TweetStreamListener.twitter_users_file) - try: - f = open(TweetStreamListener.twitter_users_file, "wt", encoding='utf-8') - for i in useridlist: - line = str(i) + "\n" - if type(line) is bytes: - line = line.decode('utf-8') - f.write(line) - f.close() - except (IOError, OSError) as e: - logger.warning("Exception: error writing to file caused by: %s" % e) - pass - except Exception as e: - raise + logger.info('Creating new Elasticsearch index or using existing ' + symbol) + es.indices.create(index="stocksight_"+symbol+"_sentiment", body=mapping, ignore=[400, 404]) + + # create instance of TweetStreamListener + TweetStreamListener = TweetStreamListener() + + # set twitter keys/tokens + auth = OAuthHandler(consumer_key, consumer_secret) + auth.set_access_token(access_token, access_token_secret) + api = API(auth) + + # create instance of the tweepy stream + stream = Stream(auth, TweetStreamListener) + logger.info("Looking up Twitter user ids from usernames...") + useridlist = [] + useridlist = get_twitter_users_from_file(TweetStreamListener.twitter_users_file) + + if len(useridlist) is 0: + logger.info("Fetching Twitter user ids from Twitter...") + logger.info(str(len(useridlist)) + '---'+str(len(twitter_feeds))) + while True: + for u in twitter_feeds: + try: + # get user id from screen name using twitter api + user = api.get_user(screen_name=u) + uid = int(user.id) + if uid not in useridlist: + useridlist.append(uid) + time.sleep(randint(0, 2)) + except TweepError as te: + # sleep a bit in case twitter suspends us + logger.warning("Tweepy exception: twitter api error caused by: %s" % te) + logger.info("Sleeping for a random amount of time and retrying...") + time.sleep(randint(1,10)) + continue + except KeyboardInterrupt: + logger.info("Ctrl-c keyboard interrupt, exiting...") + stream.disconnect() + sys.exit(0) + break + + if len(useridlist) > 0: + logger.info('Writing twitter user ids to text file %s' % TweetStreamListener.twitter_users_file) try: - # search twitter for keywords - logger.info('NLTK tokens required: ' + str(nltk_tokens_required)) - logger.info('NLTK tokens ignored: ' + str(nltk_tokens_ignored)) - logger.info('Twitter Feeds: ' + str(twitter_feeds)) - logger.info('Twitter User Ids: ' + str(useridlist)) - logger.info('Twitter keywords: ' + str(args.keywords)) - logger.info('Listening for Tweets (ctrl-c to exit)...') - if args.keywords is None: - stream.filter(follow= str(useridlist), languages=['en']) - else: - # keywords to search on twitter - # add keywords to list - keywords = args.keywords.split(',') - - if args.index in nltk_tokens_required: - nltk_tokens = nltk_tokens_required[args.index] - else: - nltk_tokens = nltk_tokens_required['default'] - - # add tokens to keywords to list - for f in nltk_tokens: - keywords.append(f.lower()) - stream.filter(track=keywords, languages=['en']) - except TweepError as te: - logger.debug("Tweepy Exception: Failed to get tweets caused by: %s" % te) - except KeyboardInterrupt: - print("Ctrl-c keyboard interrupt, exiting...") - stream.disconnect() - sys.exit(0) - - except Exception as e: - logger.warning("%s" % e) - pass + f = open(TweetStreamListener.twitter_users_file, "wt", encoding='utf-8') + for i in useridlist: + line = str(i) + "\n" + if type(line) is bytes: + line = line.decode('utf-8') + f.write(line) + f.close() + except (IOError, OSError) as e: + logger.warning("Exception: error writing to file caused by: %s" % e) + pass + except Exception as e: + raise + + + # search twitter for keywords + logger.info('NLTK tokens required: ' + str(nltk_tokens_required)) + logger.info('NLTK tokens ignored: ' + str(nltk_tokens_ignored)) + logger.info('Twitter Feeds: ' + str(twitter_feeds)) + logger.info('Twitter User Ids: ' + str(useridlist)) + logger.info('Listening for Tweets (ctrl-c to exit)...') + + stream.filter(follow=useridlist, languages=['en']) + except TweepError as te: + logger.debug("Tweepy Exception: Failed to get tweets caused by: %s" % te) + except KeyboardInterrupt: + print("Ctrl-c keyboard interrupt, exiting...") + stream.disconnect() + sys.exit(0) + + except Exception as e: + logger.warning("%s" % e) + pass except KeyboardInterrupt: print("Ctrl-c keyboard interrupt, exiting...") From f3c1895daa008bba80b08483cc2a515136ca21bd Mon Sep 17 00:00:00 2001 From: heyqule Date: Mon, 2 Sep 2019 14:22:14 -0400 Subject: [PATCH 25/55] Disable twitter sentiment stream in start.sh --- src/config.sample.py | 1 + src/startup.sh | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/config.sample.py b/src/config.sample.py index 9a76417..14bf3ec 100644 --- a/src/config.sample.py +++ b/src/config.sample.py @@ -32,6 +32,7 @@ "@muddywatersre", "@mcuban", "@AswathDamodaran", "@elerianm", "@MorganStanley", "@ianbremmer", "@GoldmanSachs", "@Wu_Tang_Finance", "@Schuldensuehner", "@NorthmanTrader", "@Frances_Coppola", "@BuzzFeed","@nytimes"] +min_followers = 1000 #Stock Price fetcher config weekday_start = 1 diff --git a/src/startup.sh b/src/startup.sh index 08f0459..103b8d8 100755 --- a/src/startup.sh +++ b/src/startup.sh @@ -14,8 +14,8 @@ tick_time=900 tick=0 let sentiment_time=900*4 -echo "Spawning Tweet Sentiment receiver instance"; -python tweet.sentiment.py & +#echo "Spawning Tweet Sentiment receiver instance"; +#python tweet.sentiment.py & while true do From e6c9f1bf7388181cf6074b842a2cdf575cd054f2 Mon Sep 17 00:00:00 2001 From: heyqule Date: Mon, 2 Sep 2019 14:25:35 -0400 Subject: [PATCH 26/55] Rename original py to og.py --- README.md | 19 +++++++++---------- src/{sentiment.py => sentiment.og.py} | 0 src/{stockprice.py => stockprice.og.py} | 0 3 files changed, 9 insertions(+), 10 deletions(-) rename src/{sentiment.py => sentiment.og.py} (100%) rename src/{stockprice.py => stockprice.og.py} (100%) diff --git a/README.md b/README.md index 1b9ab89..a86495b 100644 --- a/README.md +++ b/README.md @@ -20,8 +20,8 @@ If you are interested in joining the web site beta sign up, please email c p a r ## Requirements - Python 3. (tested with Python 3.6.5) -- Elasticsearch 5. -- Kibana 5. +- Elasticsearch 7.3.1. +- Kibana 7.3.1. - elasticsearch python module - nltk python module - requests python module @@ -42,6 +42,12 @@ $ cd stocksight Stocksight Kibana dashboard stocksight kibana dashboard +### How to use with DOCKER +- Change config.py +- run docker-compose up +- ??? +- Profit + ## How to use Install python requirements using pip @@ -138,11 +144,4 @@ optional arguments: --debug Debug message output -q, --quiet Run quiet with no message output -V, --version Prints version and exits - ``` - -### HOWTO DOCKER -- Change config.py -- Change startup.sh to include your tickers -- run docker-compose up -- ??? -- Profit \ No newline at end of file + ``` \ No newline at end of file diff --git a/src/sentiment.py b/src/sentiment.og.py similarity index 100% rename from src/sentiment.py rename to src/sentiment.og.py diff --git a/src/stockprice.py b/src/stockprice.og.py similarity index 100% rename from src/stockprice.py rename to src/stockprice.og.py From 3fc49a6438b21c397b16bd8e41b5249e7218ea82 Mon Sep 17 00:00:00 2001 From: heyqule Date: Sun, 8 Sep 2019 19:05:36 -0400 Subject: [PATCH 27/55] Change config handling Add test for yahoo new listener Add sh code to listen for elastic search server Add threading support --- .gitignore | 2 +- README.md | 125 +-- README.og.md | 141 +++ python-docker/requirements.txt | 5 +- redis-docker/redis.conf | 7 +- src/{ => Original}/config.sample.py | 33 +- .../sentiment.og.py} | 0 src/Original/stockprice.og.py | 254 ++++++ src/StockSight/EsMap/Sentiment.py | 14 +- src/StockSight/EsMap/StockPrice.py | 3 + src/StockSight/Helper/Sentiment.py | 27 - src/StockSight/Initializer/ConfigReader.py | 12 + src/StockSight/Initializer/ElasticSearch.py | 6 +- src/StockSight/Initializer/Redis.py | 4 +- src/StockSight/Model/Article.py | 10 + src/StockSight/NewsHeadlineListener.py | 103 +-- src/StockSight/SeekAlphaListener.py | 51 ++ src/StockSight/StockPriceListener.py | 33 +- src/StockSight/TweetListener.py | 13 +- src/StockSight/YahooFinanceListener.py | 62 ++ src/config.sample.yml | 62 ++ src/definitions.py | 3 + src/import.kibana.py | 7 +- src/kibana_export/export.7.3.ndjson | 2 +- src/news.sentiment.py | 25 +- src/sentiment.og.py | 802 ------------------ src/startup.sh | 34 +- src/{stockprice.docker.py => stockprice.py} | 19 +- src/twitteruserids.txt | 32 - tests/StockSight/YahooFinanceListenerTest.py | 53 ++ 30 files changed, 830 insertions(+), 1114 deletions(-) create mode 100644 README.og.md rename src/{ => Original}/config.sample.py (53%) rename src/{stockprice.og.py => Original/sentiment.og.py} (100%) create mode 100644 src/Original/stockprice.og.py create mode 100644 src/StockSight/Initializer/ConfigReader.py create mode 100644 src/StockSight/Model/Article.py create mode 100644 src/StockSight/SeekAlphaListener.py create mode 100644 src/StockSight/YahooFinanceListener.py create mode 100644 src/config.sample.yml create mode 100644 src/definitions.py delete mode 100644 src/sentiment.og.py rename src/{stockprice.docker.py => stockprice.py} (83%) delete mode 100644 src/twitteruserids.txt create mode 100644 tests/StockSight/YahooFinanceListenerTest.py diff --git a/.gitignore b/.gitignore index 9c3558a..fcdd747 100644 --- a/.gitignore +++ b/.gitignore @@ -100,11 +100,11 @@ ENV/ #Custom files data/ config.py -startup.sh .git .idea twitteruserids.txt *_export.json +config.yml # mypy .mypy_cache/ diff --git a/README.md b/README.md index a86495b..c651f9a 100644 --- a/README.md +++ b/README.md @@ -5,21 +5,22 @@ Crowd-sourced stock analyzer and stock predictor using Elasticsearch, Twitter, N [![License](https://img.shields.io/github/license/shirosaidev/stocksight.svg?label=License&maxAge=86400)](./LICENSE) [![Release](https://img.shields.io/github/release/shirosaidev/stocksight.svg?label=Release&maxAge=60)](https://github.com/shirosaidev/stocksight/releases/latest) + +Original Author (Chris Park) [![Sponsor Patreon](https://img.shields.io/badge/Sponsor%20%24-Patreon-brightgreen.svg)](https://www.patreon.com/shirosaidev) [![Donate PayPal](https://img.shields.io/badge/Donate%20%24-PayPal-brightgreen.svg)](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=CLF223XAS4W72) -stocksight diagram +Docker and new features author (Allen Jian Feng Xie) +[![Donate PayPal](https://img.shields.io/badge/Donate%20%24-PayPal-brightgreen.svg)](https://www.paypal.com/paypalme2/heyqule) + ## About stocksight is a crowd-sourced stock analysis open source software that uses Elasticsearch to store Twitter and news headlines data for stocks. stocksight analyzes the emotions of what the author writes and does sentiment analysis on the text to determine how the author "feels" about a stock. stocksight makes an aggregated analysis of all collected data from all sources. Each user running stocksight has a unique fingerprint: specific stocks they are following, news sites and twitter users they follow to find information for those stocks. This creates a unique sentiment analysis for each user, based on what data sources they are getting stocksight to search. Users can have the same stocks, but their data sources could vary significantly creating different sentiment analysis for the same stock. stocksight website will allow each user to see other sentiment analysis results from other stocksight user app results and a combined aggregated view of all. -## Stocksight web site coming soon -If you are interested in joining the web site beta sign up, please email c p a r k 1 6 @ g m a i l . c o m - ## Requirements -- Python 3. (tested with Python 3.6.5) +- Python 3. (tested with Python 3.6.8 and 3.7.4) - Elasticsearch 7.3.1. - Kibana 7.3.1. - elasticsearch python module @@ -29,6 +30,9 @@ If you are interested in joining the web site beta sign up, please email c p a r - beautifulsoup4 python module - textblob python module - vaderSentiment python module +- pytz +- redis +- pyyaml ### Download @@ -38,110 +42,17 @@ $ cd stocksight ``` [Download latest version](https://github.com/shirosaidev/stocksight/releases/latest) -## Screenshot -Stocksight Kibana dashboard -stocksight kibana dashboard - -### How to use with DOCKER -- Change config.py -- run docker-compose up +### How to setup +- Copy config.sample.py to config.py +- Change the setting in config.py to fit your need +- run "docker-compose up" - ??? - Profit -## How to use - -Install python requirements using pip - -`pip install -r requirements.txt` - -Create a new twitter application and generate your consumer key and access token. https://developer.twitter.com/en/docs/basics/developer-portal/guides/apps.html -https://developer.twitter.com/en/docs/basics/authentication/guides/access-tokens.html - -Copy config.py.sample to config.py - -Set elasticsearch settings in config.py for your env - -Add twitter consumer key/access token and secrets to config.py - -Edit config.py and modify NLTK tokens required/ignored and twitter feeds you want to mine. NLTK tokens required are keywords which must be in tweet before adding it to Elasticsearch (whitelist). NLTK tokens ignored are keywords which if are found in tweet, it will not be added to Elasticsearch (blacklist). - -### Examples - -Run sentiment.py to create 'stocksight' index in Elasticsearch and start mining and analyzing Tweets using keywords - -```sh -$ python sentiment.py -k TSLA,'Elon Musk',Musk,Tesla --debug -``` - -Start mining and analyzing Tweets from feeds in config using cached user ids from file - -```sh -$ python sentiment.py -f twitteruserids.txt --debug -``` - -Start mining and analyzing News headlines and following headline links and scraping relevant text on landing page - -```sh -$ python sentiment.py -n TSLA --followlinks --debug -``` - -Run stockprice.py to add stock prices to 'stocksight' index in Elasticsearch - -```sh -$ python stockprice.py -s TSLA --debug -``` - -Load 'stocksight' index in Kibana and import export.json file for visuals/dashboard. - -### CLI options - -``` -usage: sentiment.py [-h] [-i INDEX] [-d] [-k KEYWORDS] [-u URL] [-f FILE] - [-n SYMBOL] [--frequency FREQUENCY] [--followlinks] [-v] - [--debug] [-q] [-V] +### How to use +The following action require to run in to the python3 container. +###### Delete Elastic Indexes -optional arguments: - -h, --help show this help message and exit - -i INDEX, --index INDEX - Index name for Elasticsearch (default: stocksight) - -d, --delindex Delete existing Elasticsearch index first - -k KEYWORDS, --keywords KEYWORDS - Use keywords to search for in Tweets instead of feeds. - Separated by comma, case insensitive, spaces are ANDs - commas are ORs. Example: TSLA,'Elon - Musk',Musk,Tesla,SpaceX - -u URL, --url URL Use twitter users from any links in web page at url - -f FILE, --file FILE Use twitter user ids from file - -n SYMBOL, --newsheadlines SYMBOL - Get news headlines instead of Twitter using stock - symbol, example: TSLA - --frequency FREQUENCY - How often in seconds to retrieve news headlines - (default: 120 sec) - --followlinks Follow links on news headlines and scrape relevant - text from landing page - -v, --verbose Increase output verbosity - --debug Debug message output - -q, --quiet Run quiet with no message output - -V, --version Prints version and exits - ``` - - ``` - usage: stockprice.py [-h] [-i INDEX] [-d] [-s SYMBOL] [-f FREQUENCY] [-v] - [--debug] [-q] [-V] +###### Update twitteruserid.txt -optional arguments: - -h, --help show this help message and exit - -i INDEX, --index INDEX - Index name for Elasticsearch (default: stocksight) - -d, --delindex Delete existing Elasticsearch index first - -s SYMBOL, --symbol SYMBOL - Stock symbol to use, example: TSLA - -f FREQUENCY, --frequency FREQUENCY - How often in seconds to retrieve stock data, default: - 120 sec - -v, --verbose Increase output verbosity - --debug Debug message output - -q, --quiet Run quiet with no message output - -V, --version Prints version and exits - ``` \ No newline at end of file +###### Update Kibana Dashboard \ No newline at end of file diff --git a/README.og.md b/README.og.md new file mode 100644 index 0000000..813a9f7 --- /dev/null +++ b/README.og.md @@ -0,0 +1,141 @@ +stocksight + +# stocksight +Crowd-sourced stock analyzer and stock predictor using Elasticsearch, Twitter, News headlines and Python natural language processing and sentiment analysis. How much do emotions on Twitter and news headlines affect a stock's price? Let's find out ... + +[![License](https://img.shields.io/github/license/shirosaidev/stocksight.svg?label=License&maxAge=86400)](./LICENSE) +[![Release](https://img.shields.io/github/release/shirosaidev/stocksight.svg?label=Release&maxAge=60)](https://github.com/shirosaidev/stocksight/releases/latest) +[![Sponsor Patreon](https://img.shields.io/badge/Sponsor%20%24-Patreon-brightgreen.svg)](https://www.patreon.com/shirosaidev) +[![Donate PayPal](https://img.shields.io/badge/Donate%20%24-PayPal-brightgreen.svg)](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=CLF223XAS4W72) + +stocksight diagram + +## About +stocksight is a crowd-sourced stock analysis open source software that uses Elasticsearch to store Twitter and news headlines data for stocks. stocksight analyzes the emotions of what the author writes and does sentiment analysis on the text to determine how the author "feels" about a stock. stocksight makes an aggregated analysis of all collected data from all sources. + +Each user running stocksight has a unique fingerprint: specific stocks they are following, news sites and twitter users they follow to find information for those stocks. This creates a unique sentiment analysis for each user, based on what data sources they are getting stocksight to search. Users can have the same stocks, but their data sources could vary significantly creating different sentiment analysis for the same stock. stocksight website will allow each user to see other sentiment analysis results from other stocksight user app results and a combined aggregated view of all. + +## Stocksight web site coming soon +If you are interested in joining the web site beta sign up, please email c p a r k 1 6 @ g m a i l . c o m + +## Requirements +- Python 3. (tested with Python 3.6.5) +- Elasticsearch 5. +- Kibana 5. +- elasticsearch python module +- nltk python module +- requests python module +- tweepy python module +- beautifulsoup4 python module +- textblob python module +- vaderSentiment python module + +### Download + +```shell +$ git clone https://github.com/shirosaidev/stocksight.git +$ cd stocksight +``` +[Download latest version](https://github.com/shirosaidev/stocksight/releases/latest) + +## Screenshot +Stocksight Kibana dashboard +stocksight kibana dashboard + +## How to use + +Install python requirements using pip + +`pip install -r requirements.txt` + +Create a new twitter application and generate your consumer key and access token. https://developer.twitter.com/en/docs/basics/developer-portal/guides/apps.html +https://developer.twitter.com/en/docs/basics/authentication/guides/access-tokens.html + +Copy config.py.sample to config.py + +Set elasticsearch settings in config.py for your env + +Add twitter consumer key/access token and secrets to config.py + +Edit config.py and modify NLTK tokens required/ignored and twitter feeds you want to mine. NLTK tokens required are keywords which must be in tweet before adding it to Elasticsearch (whitelist). NLTK tokens ignored are keywords which if are found in tweet, it will not be added to Elasticsearch (blacklist). + +### Examples + +Run sentiment.py to create 'stocksight' index in Elasticsearch and start mining and analyzing Tweets using keywords + +```sh +$ python sentiment.py -k TSLA,'Elon Musk',Musk,Tesla --debug +``` + +Start mining and analyzing Tweets from feeds in config using cached user ids from file + +```sh +$ python sentiment.py -f twitteruserids.txt --debug +``` + +Start mining and analyzing News headlines and following headline links and scraping relevant text on landing page + +```sh +$ python sentiment.py -n TSLA --followlinks --debug +``` + +Run stockprice.py to add stock prices to 'stocksight' index in Elasticsearch + +```sh +$ python stockprice.py -s TSLA --debug +``` + +Load 'stocksight' index in Kibana and import export.json file for visuals/dashboard. + +### CLI options + +``` +usage: sentiment.py [-h] [-i INDEX] [-d] [-k KEYWORDS] [-u URL] [-f FILE] + [-n SYMBOL] [--frequency FREQUENCY] [--followlinks] [-v] + [--debug] [-q] [-V] + +optional arguments: + -h, --help show this help message and exit + -i INDEX, --index INDEX + Index name for Elasticsearch (default: stocksight) + -d, --delindex Delete existing Elasticsearch index first + -k KEYWORDS, --keywords KEYWORDS + Use keywords to search for in Tweets instead of feeds. + Separated by comma, case insensitive, spaces are ANDs + commas are ORs. Example: TSLA,'Elon + Musk',Musk,Tesla,SpaceX + -u URL, --url URL Use twitter users from any links in web page at url + -f FILE, --file FILE Use twitter user ids from file + -n SYMBOL, --newsheadlines SYMBOL + Get news headlines instead of Twitter using stock + symbol, example: TSLA + --frequency FREQUENCY + How often in seconds to retrieve news headlines + (default: 120 sec) + --followlinks Follow links on news headlines and scrape relevant + text from landing page + -v, --verbose Increase output verbosity + --debug Debug message output + -q, --quiet Run quiet with no message output + -V, --version Prints version and exits + ``` + + ``` + usage: stockprice.py [-h] [-i INDEX] [-d] [-s SYMBOL] [-f FREQUENCY] [-v] + [--debug] [-q] [-V] + +optional arguments: + -h, --help show this help message and exit + -i INDEX, --index INDEX + Index name for Elasticsearch (default: stocksight) + -d, --delindex Delete existing Elasticsearch index first + -s SYMBOL, --symbol SYMBOL + Stock symbol to use, example: TSLA + -f FREQUENCY, --frequency FREQUENCY + How often in seconds to retrieve stock data, default: + 120 sec + -v, --verbose Increase output verbosity + --debug Debug message output + -q, --quiet Run quiet with no message output + -V, --version Prints version and exits + ``` \ No newline at end of file diff --git a/python-docker/requirements.txt b/python-docker/requirements.txt index 580cfe6..41882af 100644 --- a/python-docker/requirements.txt +++ b/python-docker/requirements.txt @@ -1,4 +1,4 @@ -elasticsearch>=5.0.0,<6.0.0 +elasticsearch>=7.0.0,<8.0.0 requests nltk tweepy @@ -6,4 +6,5 @@ beautifulsoup4 textblob vaderSentiment pytz -redis \ No newline at end of file +redis +pyyaml \ No newline at end of file diff --git a/redis-docker/redis.conf b/redis-docker/redis.conf index 5eab77e..df8e573 100644 --- a/redis-docker/redis.conf +++ b/redis-docker/redis.conf @@ -215,9 +215,10 @@ always-show-logo yes # # save "" -#save 900 1 -#save 300 10 -#save 60 10000 +save 3600 1 +save 900 100 +save 300 1000 +save 60 10000 # By default Redis will stop accepting writes if RDB snapshots are enabled # (at least one save point) and the latest background save failed. diff --git a/src/config.sample.py b/src/Original/config.sample.py similarity index 53% rename from src/config.sample.py rename to src/Original/config.sample.py index 14bf3ec..789c954 100644 --- a/src/config.sample.py +++ b/src/Original/config.sample.py @@ -1,29 +1,13 @@ -#Global Config -elasticsearch_host = "elasticsearch" +elasticsearch_host = "localhost" elasticsearch_port = 9200 elasticsearch_user = "" elasticsearch_password = "" - -redis_host = "redis" -redis_port = 6379 - -symbols = ['tsla','amd']; -follow_link = False; - - -#Sentiment Analyizers config -nltk_tokens_required = { - 'default': ("increase","decrease","buying","sold","buy","selling","winning","losing"), - 'tsla': ("tesla", "@tesla", "#tesla", "tsla", "#tsla", "elonmusk", "elon", "musk"), - 'amd': ('amd','ryzen','epyc','radeon','crossfire','threadripper') -} -nltk_tokens_ignored = ("win", "giveaway") - -#Twitter Settings consumer_key = "" consumer_secret = "" access_token = "" access_token_secret = "" +nltk_tokens_required = ("Tesla", "@Tesla", "#Tesla", "tesla", "TSLA", "tsla", "#TSLA", "#tsla", "elonmusk", "Elon", "Musk") +nltk_tokens_ignored = ("win", "Win", "giveaway", "Giveaway") twitter_feeds = ["@elonmusk", "@cnbc", "@benzinga", "@stockwits", "@Newsweek", "@WashingtonPost", "@breakoutstocks", "@bespokeinvest", "@WSJMarkets", "@stephanie_link", "@nytimesbusiness", "@IBDinvestors", @@ -31,12 +15,5 @@ "@Carl_C_Icahn", "@ReformedBroker", "@bespokeinvest", "@stlouisfed", "@muddywatersre", "@mcuban", "@AswathDamodaran", "@elerianm", "@MorganStanley", "@ianbremmer", "@GoldmanSachs", "@Wu_Tang_Finance", - "@Schuldensuehner", "@NorthmanTrader", "@Frances_Coppola", "@BuzzFeed","@nytimes"] -min_followers = 1000 - -#Stock Price fetcher config -weekday_start = 1 -weekday_end = 5 -hour_start = 9 -hour_end = 18 -timezone_str = 'America/Toronto' \ No newline at end of file + "@Schuldensuehner", "@NorthmanTrader", "@Frances_Coppola", "@bySamRo", + "@BuzzFeed","@nytimes"] \ No newline at end of file diff --git a/src/stockprice.og.py b/src/Original/sentiment.og.py similarity index 100% rename from src/stockprice.og.py rename to src/Original/sentiment.og.py diff --git a/src/Original/stockprice.og.py b/src/Original/stockprice.og.py new file mode 100644 index 0000000..9bb940e --- /dev/null +++ b/src/Original/stockprice.og.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""stockprice.py - get stock price from Yahoo and add to +Elasticsearch. +See README.md or https://github.com/shirosaidev/stocksight +for more information. + +Copyright (C) Chris Park 2018 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" + +import time +import requests +import re +import argparse +import logging +import sys +try: + from elasticsearch5 import Elasticsearch +except ImportError: + from elasticsearch import Elasticsearch +from random import randint + +# import elasticsearch host +from config import elasticsearch_host, elasticsearch_port, elasticsearch_user, elasticsearch_password + + +STOCKSIGHT_VERSION = '0.1-b.5' +__version__ = STOCKSIGHT_VERSION + +# url to fetch stock price from, SYMBOL will be replaced with symbol from cli args +url = "https://query1.finance.yahoo.com/v8/finance/chart/SYMBOL?region=US&lang=en-US&includePrePost=false&interval=2m&range=5d&corsDomain=finance.yahoo.com&.tsrc=finance" + +# create instance of elasticsearch +es = Elasticsearch(hosts=[{'host': elasticsearch_host, 'port': elasticsearch_port}], + http_auth=(elasticsearch_user, elasticsearch_password)) + +class GetStock: + + def get_price(self, url, symbol): + import re + + while True: + + logger.info("Grabbing stock data for symbol %s..." % symbol) + + try: + + # add stock symbol to url + url = re.sub("SYMBOL", symbol, url) + # get stock data (json) from url + try: + r = requests.get(url) + data = r.json() + except (requests.HTTPError, requests.ConnectionError, requests.ConnectTimeout) as re: + logger.error("Exception: exception getting stock data from url caused by %s" % re) + raise + logger.debug(data) + # build dict to store stock info + try: + D = {} + D['symbol'] = symbol + D['last'] = data['chart']['result'][0]['indicators']['quote'][0]['close'][-1] + if D['last'] is None: + D['last'] = data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] + D['date'] = time.strftime('%Y-%m-%dT%H:%M:%S', time.gmtime()) # time now in gmt (utc) + try: + D['change'] = (data['chart']['result'][0]['indicators']['quote'][0]['close'][-1] - + data['chart']['result'][0]['indicators']['quote'][0]['close'][-2]) / \ + data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] * 100 + except TypeError: + D['change'] = (data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] - + data['chart']['result'][0]['indicators']['quote'][0]['close'][-3]) / \ + data['chart']['result'][0]['indicators']['quote'][0]['close'][-3] * 100 + pass + D['high'] = data['chart']['result'][0]['indicators']['quote'][0]['high'][-1] + if D['high'] is None: + D['high'] = data['chart']['result'][0]['indicators']['quote'][0]['high'][-2] + D['low'] = data['chart']['result'][0]['indicators']['quote'][0]['low'][-1] + if D['low'] is None: + D['low'] = data['chart']['result'][0]['indicators']['quote'][0]['low'][-2] + D['vol'] = data['chart']['result'][0]['indicators']['quote'][0]['volume'][-1] + if D['vol'] is None: + D['vol'] = data['chart']['result'][0]['indicators']['quote'][0]['volume'][-2] + logger.debug(D) + except KeyError as e: + logger.error("Exception: exception getting stock data caused by %s" % e) + raise + + # check before adding to ES + if D['last'] is not None and D['high'] is not None and D['low'] is not None: + logger.info("Adding stock data to Elasticsearch...") + # add stock price info to elasticsearch + es.index(index=args.index, + doc_type="stock", + body={"symbol": D['symbol'], + "price_last": D['last'], + "date": D['date'], + "change": D['change'], + "price_high": D['high'], + "price_low": D['low'], + "vol": D['vol'] + }) + else: + logger.warning("Some stock data had null values, not adding to Elasticsearch") + + except Exception as e: + logger.error("Exception: can't get stock data, trying again later, reason is %s" % e) + pass + + logger.info("Will get stock data again in %s sec..." % args.frequency) + time.sleep(args.frequency) + + +if __name__ == '__main__': + + # parse cli args + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--index", metavar="INDEX", default="stocksight", + help="Index name for Elasticsearch (default: stocksight)") + parser.add_argument("-d", "--delindex", action="store_true", + help="Delete existing Elasticsearch index first") + parser.add_argument("-s", "--symbol", metavar="SYMBOL", + help="Stock symbol to use, example: TSLA") + parser.add_argument("-f", "--frequency", metavar="FREQUENCY", default=120, type=int, + help="How often in seconds to retrieve stock data (default: 120 sec)") + parser.add_argument("-v", "--verbose", action="store_true", + help="Increase output verbosity") + parser.add_argument("--debug", action="store_true", + help="Debug message output") + parser.add_argument("-q", "--quiet", action="store_true", + help="Run quiet with no message output") + parser.add_argument("-V", "--version", action="version", + version="stocksight v%s" % STOCKSIGHT_VERSION, + help="Prints version and exits") + args = parser.parse_args() + + # set up logging + logger = logging.getLogger('stocksight') + logger.setLevel(logging.INFO) + eslogger = logging.getLogger('elasticsearch') + eslogger.setLevel(logging.WARNING) + requestslogger = logging.getLogger('requests') + requestslogger.setLevel(logging.WARNING) + logging.addLevelName( + logging.INFO, "\033[1;32m%s\033[1;0m" + % logging.getLevelName(logging.INFO)) + logging.addLevelName( + logging.WARNING, "\033[1;31m%s\033[1;0m" + % logging.getLevelName(logging.WARNING)) + logging.addLevelName( + logging.ERROR, "\033[1;41m%s\033[1;0m" + % logging.getLevelName(logging.ERROR)) + logging.addLevelName( + logging.DEBUG, "\033[1;33m%s\033[1;0m" + % logging.getLevelName(logging.DEBUG)) + logformatter = '%(asctime)s [%(levelname)s][%(name)s] %(message)s' + loglevel = logging.INFO + logging.basicConfig(format=logformatter, level=loglevel) + if args.verbose: + logger.setLevel(logging.INFO) + eslogger.setLevel(logging.INFO) + requestslogger.setLevel(logging.INFO) + if args.debug: + logger.setLevel(logging.DEBUG) + eslogger.setLevel(logging.DEBUG) + requestslogger.setLevel(logging.DEBUG) + if args.quiet: + logger.disabled = True + eslogger.disabled = True + requestslogger.disabled = True + + # print banner + if not args.quiet: + c = randint(1, 4) + if c == 1: + color = '31m' + elif c == 2: + color = '32m' + elif c == 3: + color = '33m' + elif c == 4: + color = '35m' + + banner = """\033[%s + + /$$ /$$ /$$ /$$ /$$ + | $$ | $$ |__/ | $$ | $$ + /$$$$$$$ /$$$$$$ /$$$$$$ /$$$$$$$| $$ /$$ /$$$$$$$ /$$ /$$$$$$ | $$$$$$$ /$$$$$$ + /$$_____/|_ $$_/ /$$__ $$ /$$_____/| $$ /$$/ /$$_____/| $$ /$$__ $$| $$__ $$|_ $$_/ + | $$$$$$ | $$ | $$ \ $$| $$ | $$$$$$/ | $$$$$$ | $$| $$ \ $$| $$ \ $$ | $$ + \____ $$ | $$ /$$| $$ | $$| $$ | $$_ $$ \____ $$| $$| $$ | $$| $$ | $$ | $$ /$$ + /$$$$$$$/ | $$$$/| $$$$$$/| $$$$$$$| $$ \ $$ /$$$$$$$/| $$| $$$$$$$| $$ | $$ | $$$$/ + |_______/ \___/ \______/ \_______/|__/ \__/|_______/ |__/ \____ $$|__/ |__/ \___/ + /$$ \ $$ + :) = +$ :( = -$ | $$$$$$/ + \______/ v%s + \033[0m""" % (color, STOCKSIGHT_VERSION) + print(banner + '\n') + + # set up elasticsearch mappings and create index + mappings = { + "mappings": { + "stock": { + "properties": { + "symbol": { + "type": "keyword" + }, + "price_last": { + "type": "float" + }, + "date": { + "type": "date" + }, + "change": { + "type": "float" + }, + "price_high": { + "type": "float" + }, + "price_low": { + "type": "float" + }, + "vol": { + "type": "integer" + } + } + } + } + } + + if args.symbol is None: + print("No stock symbol, see -h for help.") + sys.exit(1) + + if args.delindex: + logger.info('Deleting existing Elasticsearch index ' + args.index) + es.indices.delete(index=args.index, ignore=[400, 404]) + + logger.info('Creating new Elasticsearch index or using existing ' + args.index) + es.indices.create(index=args.index, body=mappings, ignore=[400, 404]) + + # create instance of GetStock + stockprice = GetStock() + + try: + # get stock price + stockprice.get_price(symbol=args.symbol, url=url) + except Exception as e: + logger.warning("Exception: Failed to get stock data caused by: %s" % e) + except KeyboardInterrupt: + print("Ctrl-c keyboard interrupt, exiting...") + sys.exit(0) \ No newline at end of file diff --git a/src/StockSight/EsMap/Sentiment.py b/src/StockSight/EsMap/Sentiment.py index b7f1008..c55f4eb 100644 --- a/src/StockSight/EsMap/Sentiment.py +++ b/src/StockSight/EsMap/Sentiment.py @@ -5,14 +5,23 @@ "author": { "type": "keyword", }, + "referer_url": { + "type": "keyword", + }, + "url": { + "type": "keyword", + }, "location": { "type": "keyword", }, "date": { "type": "date" }, + "title": { + "type": "text", + }, "message": { - "type": "keyword", + "type": "text", }, "msg_id": { "type": "text" @@ -27,6 +36,9 @@ "type": "keyword", } } + }, + "index" : { + "number_of_replicas" : "0" } } diff --git a/src/StockSight/EsMap/StockPrice.py b/src/StockSight/EsMap/StockPrice.py index d5a87fb..43408be 100644 --- a/src/StockSight/EsMap/StockPrice.py +++ b/src/StockSight/EsMap/StockPrice.py @@ -24,5 +24,8 @@ "type": "integer" } } + }, + "index" : { + "number_of_replicas" : "0" } } \ No newline at end of file diff --git a/src/StockSight/Helper/Sentiment.py b/src/StockSight/Helper/Sentiment.py index 4de6756..aa98e23 100644 --- a/src/StockSight/Helper/Sentiment.py +++ b/src/StockSight/Helper/Sentiment.py @@ -21,33 +21,6 @@ from StockSight.Initializer.Logger import * -def get_page_text(url): - - max_paragraphs = 10 - - try: - logger.debug(url) - req = requests.get(url) - html = req.text - soup = BeautifulSoup(html, 'html.parser') - html_p = soup.findAll('p') - - logger.debug(html_p) - - if html_p: - n = 1 - for i in html_p: - if n <= max_paragraphs: - if i.string is not None: - logger.debug(i.string) - yield i.string - n += 1 - - except requests.exceptions.RequestException as re: - logger.warning("Exception: can't crawl web site (%s)" % re) - pass - - def get_sentiment_from_url(text, sentimentURL): payload = {'text': text} diff --git a/src/StockSight/Initializer/ConfigReader.py b/src/StockSight/Initializer/ConfigReader.py new file mode 100644 index 0000000..46c8c0d --- /dev/null +++ b/src/StockSight/Initializer/ConfigReader.py @@ -0,0 +1,12 @@ +import yaml +from definitions import PROJECT_SRC_PATH + +config_file = PROJECT_SRC_PATH+'/config.yml' + +def load_config(config_file): + data = None + with open(config_file) as json_data_file: + data = yaml.load(json_data_file, yaml.FullLoader) + return data + +config = load_config(config_file) diff --git a/src/StockSight/Initializer/ElasticSearch.py b/src/StockSight/Initializer/ElasticSearch.py index 30cb0e7..3360dcb 100644 --- a/src/StockSight/Initializer/ElasticSearch.py +++ b/src/StockSight/Initializer/ElasticSearch.py @@ -16,8 +16,8 @@ except ImportError: from elasticsearch import Elasticsearch -from config import elasticsearch_host, elasticsearch_port, elasticsearch_user, elasticsearch_password +from StockSight.Initializer.ConfigReader import config # create instance of elasticsearch -es = Elasticsearch(hosts=[{'host': elasticsearch_host, 'port': elasticsearch_port}], - http_auth=(elasticsearch_user, elasticsearch_password)) \ No newline at end of file +es = Elasticsearch(hosts=[{'host': config['elasticsearch']['host'], 'port': config['elasticsearch']['port']}], + http_auth=(config['elasticsearch']['user'], config['elasticsearch']['password'])) \ No newline at end of file diff --git a/src/StockSight/Initializer/Redis.py b/src/StockSight/Initializer/Redis.py index cdd1b2c..993bf00 100644 --- a/src/StockSight/Initializer/Redis.py +++ b/src/StockSight/Initializer/Redis.py @@ -10,6 +10,6 @@ LICENSE for the full license text. """ import redis -from config import redis_host, redis_port +from StockSight.Initializer.ConfigReader import config -rds = redis.Redis(host=str(redis_host), port=redis_port, db=0) \ No newline at end of file +rds = redis.Redis(host=str(config['redis']['host']), port=config['redis']['port'], db=config['redis']['db']) \ No newline at end of file diff --git a/src/StockSight/Model/Article.py b/src/StockSight/Model/Article.py new file mode 100644 index 0000000..317ea09 --- /dev/null +++ b/src/StockSight/Model/Article.py @@ -0,0 +1,10 @@ +class Article: + + def __init__(self, title, url, body = '', referer_url = ''): + self.title = title + self.body = body + self.url = url + self.referer_url = referer_url + + def __eq__(self, other): + return self.url == other.url and self.title == other.title diff --git a/src/StockSight/NewsHeadlineListener.py b/src/StockSight/NewsHeadlineListener.py index 696179a..cde7344 100644 --- a/src/StockSight/NewsHeadlineListener.py +++ b/src/StockSight/NewsHeadlineListener.py @@ -14,32 +14,39 @@ import re from datetime import datetime + import nltk +from abc import ABC, abstractmethod try: import urllib.parse as urlparse except ImportError: import urlparse -from config import * +from StockSight.Initializer.ConfigReader import config from StockSight.Initializer.ElasticSearch import es from StockSight.Initializer.Redis import rds from StockSight.Helper.Sentiment import * +from StockSight.Model.Article import * + -class NewsHeadlineListener: - def __init__(self, symbol,url=None): +class NewsHeadlineListener(ABC): + def __init__(self, type, symbol, url=None): self.symbol = symbol self.url = url + self.type = type self.cache_length = 2628000 + self.index_name = config['elasticsearch']['table_prefix']['sentiment']+self.symbol.lower() - new_headlines = self.get_news_headlines(self.url) + def execute(self): + logger.info("Scraping news for %s from %s... Start" % (self.symbol, self.type)) + articles = self.get_news_headlines() # add any new headlines - for htext, htext_url in new_headlines: + for article_obj in articles: - md5_hash = hashlib.md5((htext+htext_url).encode()).hexdigest() - logger.debug("Hash"+md5_hash) + md5_hash = hashlib.md5((article_obj.title + article_obj.url).encode()).hexdigest() if rds.exists(md5_hash) is 0: @@ -47,28 +54,27 @@ def __init__(self, symbol,url=None): # output news data print("\n------------------------------") print("Date: " + datenow) - print("News Headline: " + htext) - print("Location (url): " + htext_url) + print("News Headline: " + article_obj.title) + print("Location (url): " + article_obj.url) # create tokens of words in text using nltk text_for_tokens = re.sub( - r"[\%|\$|\.|\,|\!|\:|\@]|\(|\)|\#|\+|(``)|('')|\?|\-", "", htext) + r"[\%|\$|\.|\,|\!|\:|\@]|\(|\)|\#|\+|(``)|('')|\?|\-", "", article_obj.title) tokens = nltk.word_tokenize(text_for_tokens.lower()) print("NLTK Tokens: " + str(tokens)) # check ignored tokens from config - for t in nltk_tokens_ignored: + for t in config['sentiment_analyzer']['ignore_words']: if t in tokens: logger.info("Text contains token from ignore list, not adding") rds.set(md5_hash,1,self.cache_length) continue + nltk_tokens = [] + if self.symbol in config['tickers']: + nltk_tokens = config['tickers'][self.symbol] - if self.symbol in nltk_tokens_required: - nltk_tokens = nltk_tokens_required[self.symbol] - else: - nltk_tokens = nltk_tokens_required['default'] # check required tokens from config tokenspass = False @@ -83,63 +89,34 @@ def __init__(self, symbol,url=None): continue # get sentiment values - polarity, subjectivity, sentiment = sentiment_analysis(htext) + polarity, subjectivity, sentiment = sentiment_analysis(article_obj.title + "/n" + article_obj.body) logger.info("Adding news headline to elasticsearch") # add news headline data and sentiment info to elasticsearch - es.index(index="stocksight_"+self.symbol+"_sentiment", + es.index(index=self.index_name, doc_type="_doc", - body={"date": datenow, - "location": htext_url, - "message": htext, - "polarity": polarity, + body={ "msg_id": md5_hash, + "date": datenow, + "referer_url": article_obj.refer_url, + "url": article_obj.url, + "title": article_obj.title, + "message": article_obj.body, + "polarity": polarity, "subjectivity": subjectivity, - "sentiment": sentiment}) - rds.set(md5_hash,1,self.cache_length) - - - def get_news_headlines(self, url): + "sentiment": sentiment + }) - latestheadlines = [] - latestheadlines_links = [] - parsed_uri = urlparse.urljoin(url, '/') - - try: - - req = requests.get(url) - html = req.text - soup = BeautifulSoup(html, 'html.parser') - html = soup.findAll('h3') - links = soup.findAll('a') - - logger.debug(html) - logger.debug(links) - - if html: - for i in html: - latestheadlines.append((str(i.next.next.next.next), url)) - logger.debug(latestheadlines) - - if follow_link: - if links: - for i in links: - if '/news/' in i['href']: - l = parsed_uri.rstrip('/') + i['href'] - latestheadlines_links.append(l) - - logger.debug(latestheadlines_links) + rds.set(md5_hash,1,self.cache_length) - logger.info("Following any new links and grabbing text from page...") + logger.info("Scraping news for %s from %s... Done" % (self.symbol, self.type)) - for linkurl in latestheadlines_links: - for p in get_page_text(linkurl): - latestheadlines.append((str(p), linkurl)) - logger.debug(latestheadlines) + @abstractmethod + def get_news_headlines(self): + pass - except requests.exceptions.RequestException as re: - logger.warning("Exception: can't crawl web site (%s)" % re) - pass + @abstractmethod + def get_page_text(url): + pass - return latestheadlines diff --git a/src/StockSight/SeekAlphaListener.py b/src/StockSight/SeekAlphaListener.py new file mode 100644 index 0000000..9a07271 --- /dev/null +++ b/src/StockSight/SeekAlphaListener.py @@ -0,0 +1,51 @@ +from StockSight.NewsHeadlineListener import * + + +class SeekAlphaListener(NewsHeadlineListener): + def __init__(self,symbol): + super.__init__(symbol,"https://finance.yahoo.com/quote/%s/?p=%s" % (symbol, symbol)) + + def get_news_headlines(self): + + latestheadlines = [] + latestheadlines_links = [] + + parsed_uri = urlparse.urljoin(self.url, '/') + + try: + + req = requests.get(self.url) + html = req.text + soup = BeautifulSoup(html, 'html.parser') + html = soup.findAll('h3') + links = soup.findAll('a') + + if html: + for i in html: + latestheadlines.append((str(i.next.next.next.next), self.url)) + logger.debug(latestheadlines) + + if config['news']['follow_link']: + if links: + for i in links: + if '/news/' in i['href']: + l = parsed_uri.rstrip('/') + i['href'] + latestheadlines_links.append(l) + + logger.debug(latestheadlines_links) + + logger.info("Following any new links and grabbing text from page...") + + for linkurl in latestheadlines_links: + for p in self.get_page_text(linkurl): + latestheadlines.append((str(p), linkurl)) + logger.debug(latestheadlines) + + except requests.exceptions.RequestException as re: + logger.warning("Exception: can't crawl web site (%s)" % re) + pass + + return latestheadlines + + def get_page_text(self): + pass \ No newline at end of file diff --git a/src/StockSight/StockPriceListener.py b/src/StockSight/StockPriceListener.py index c4fb7fe..2601289 100644 --- a/src/StockSight/StockPriceListener.py +++ b/src/StockSight/StockPriceListener.py @@ -16,25 +16,32 @@ import requests from pytz import timezone -from config import weekday_start, weekday_end, hour_start, hour_end, timezone_str +from StockSight.Initializer.ConfigReader import * from StockSight.Initializer.Logger import logger from StockSight.Initializer.ElasticSearch import es regex = re class StockPriceListener: + def __init__(self): + self.index_name = None def get_price(self, symbol): + + logger.info("Scraping price for %s from Yahoo Finance ..." % (symbol)) + + if self.index_name is None: + self.index_name = config['elasticsearch']['table_prefix']['price']+symbol.lower() + url = "https://query1.finance.yahoo.com/v8/finance/chart/SYMBOL?region=US&lang=en-US&includePrePost=false&interval=2m&range=5d&corsDomain=finance.yahoo.com&.tsrc=finance" - eastern_timezone = timezone(timezone_str) - if self.isNotLive(eastern_timezone): - today = datetime.datetime.now(eastern_timezone) + current_timezone = timezone(config['stock_price']['timezone_str']) + + if config['stock_price']['time_check'] and self.isNotLive(current_timezone): + today = datetime.datetime.now(current_timezone) logger.info("Stock market is not live. Current time: %s" % today.strftime("%Y-%m-%d %H:%M")) return self; - - logger.info("Grabbing stock data for symbol %s..." % symbol) try: @@ -48,7 +55,6 @@ def get_price(self, symbol): except (requests.HTTPError, requests.ConnectionError, requests.ConnectTimeout) as re: logger.error("Exception: exception getting stock data from url caused by %s" % re) raise - logger.debug(data) # build dict to store stock info try: D = {} @@ -75,7 +81,6 @@ def get_price(self, symbol): D['vol'] = data['chart']['result'][0]['indicators']['quote'][0]['volume'][-1] if D['vol'] is None: D['vol'] = data['chart']['result'][0]['indicators']['quote'][0]['volume'][-2] - logger.debug(D) except KeyError as e: logger.error("Exception: exception getting stock data caused by %s" % e) raise @@ -84,7 +89,7 @@ def get_price(self, symbol): if D['last'] is not None and D['high'] is not None and D['low'] is not None: logger.info("Adding stock data to Elasticsearch...") # add stock price info to elasticsearch - es.index(index="stocksight_"+symbol+"_price", + es.index(index=self.index_name, doc_type="_doc", body={"symbol": D['symbol'], "price_last": D['last'], @@ -101,15 +106,17 @@ def get_price(self, symbol): logger.error("Exception: can't get stock data, trying again later, reason is %s" % e) pass + logger.info("Scraping price for %s from Yahoo Finance... Done" % (symbol)) + return self; def isNotLive(self, timezone): today = datetime.datetime.now(timezone); - if today.weekday() >= weekday_start and \ - today.weekday() <= weekday_end and \ - today.hour >= hour_start and \ - today.hour <= hour_end: + if today.weekday() >= config['stock_price']['weekday_start'] and \ + today.weekday() <= config['stock_price']['weekday_end'] and \ + today.hour >= config['stock_price']['hour_start'] and \ + today.hour <= config['stock_price']['hour_end']: return False; return True; diff --git a/src/StockSight/TweetListener.py b/src/StockSight/TweetListener.py index 4f08cd5..173df39 100644 --- a/src/StockSight/TweetListener.py +++ b/src/StockSight/TweetListener.py @@ -17,7 +17,7 @@ import nltk -from config import * +from definitions import * from StockSight.Initializer.ElasticSearch import es from StockSight.Initializer.Redis import rds from StockSight.Helper.Sentiment import * @@ -141,14 +141,17 @@ def on_data(self, data): # add twitter data and sentiment info to elasticsearch es.index(index="stocksight_"+self.symbol+"_sentiment", doc_type="_doc", - body={"author": screen_name, + body={ + "_id": redis_id, + "author": screen_name, "location": location, "date": created_date, + "title": '', "message": text_filtered, - "msg_id": redis_id, "polarity": polarity, "subjectivity": subjectivity, - "sentiment": sentiment}) + "sentiment": sentiment + }) # add tweet_id to cache rds.set(redis_id,1,86400) @@ -193,7 +196,6 @@ def get_twitter_users_from_url(url): if parsed_uri in twitter_urls and "=" not in link and "?" not in link: user = link.split('/')[3] twitter_users.append(u'@' + user) - logger.debug(twitter_users) except requests.exceptions.RequestException as re: logger.warning("Requests exception: can't crawl web site caused by: %s" % re) pass @@ -209,7 +211,6 @@ def get_twitter_users_from_file(file): for line in f.readlines(): u = line.strip() twitter_users.append(u) - logger.debug(twitter_users) f.close() except (IOError, OSError) as e: logger.warning("Exception: error opening file caused by: %s" % e) diff --git a/src/StockSight/YahooFinanceListener.py b/src/StockSight/YahooFinanceListener.py new file mode 100644 index 0000000..458ab3b --- /dev/null +++ b/src/StockSight/YahooFinanceListener.py @@ -0,0 +1,62 @@ +from StockSight.NewsHeadlineListener import * + + +class YahooFinanceListener(NewsHeadlineListener): + def __init__(self,symbol): + super(YahooFinanceListener, self)\ + .__init__("Yahoo Finance", symbol, "https://finance.yahoo.com/quote/%s/?p=%s" % (symbol, symbol)) + + def get_news_headlines(self): + + articles = [] + + parsed_uri = urlparse.urljoin(self.url, '/') + + try: + + req = requests.get(self.url) + html = req.text + soup = BeautifulSoup(html, 'html.parser') + html = soup.findAll('h3') + + if html: + for rawArticle in html: + + aTag = rawArticle.find('a') + article = Article(aTag.text, aTag.get('href')) + + if config['news']['follow_link']: + new_url = parsed_uri + article.url + for p in self.get_page_text(new_url): + article.body += str(p) + + article.refer_url = self.url + articles.append(article) + + except requests.exceptions.RequestException as re: + logger.warning("Exception: can't crawl web site (%s)" % re) + pass + + return articles + + def get_page_text(self, url): + max_paragraphs = 5 + try: + req = requests.get(url) + html = req.text + soup = BeautifulSoup(html, 'html.parser') + html_p = soup.findAll('p') + + if html_p: + n = 1 + for i in html_p: + if n <= max_paragraphs: + if i.string is not None: + yield i.string + else: + break + n += 1 + + except requests.exceptions.RequestException as re: + logger.warning("Exception: can't crawl web site (%s)" % re) + pass diff --git a/src/config.sample.yml b/src/config.sample.yml new file mode 100644 index 0000000..7597ecd --- /dev/null +++ b/src/config.sample.yml @@ -0,0 +1,62 @@ +elasticsearch: + host : elasticsearch + port : 9200 + user : your_cred + password : your_cred + table_prefix: + sentiment : stocksight_sentiment_ + price : stocksight_price_ + +redis: + host : redis + port : 6379 + db : 0 + +sentiment_analyzer: + ignore_words: + - win + - giveaway + - vs + +news: + follow_link: false + +twitter: + consumer_key : your_cred + consumer_secret : your_cred + access_token : your_cred + access_token_secret : your_cred + min_followers : 1000 + feeds : + - '@elonmusk' + - '@stockwits' + - '@nytimes' + - '@MorganStanley' + - '@GoldmanSachs' + - '@WSJmarkets' + - '@WashingtonPost' + - '@nytimesbusiness' + - '@reutersbiz' + +stock_price: + time_check : true #True, fetch the price only within the below time range + weekday_start : 1 + weekday_end : 5 + hour_start : 9 + hour_end : 18 + timezone_str : America/Toronto + +tickers: + tsla: + - tesla + - tsla + - elonmusk + - elon + - musk + amd: + - amd + - ryzen + - epyc + - radeon + - crossfire + - threadripper \ No newline at end of file diff --git a/src/definitions.py b/src/definitions.py new file mode 100644 index 0000000..9727287 --- /dev/null +++ b/src/definitions.py @@ -0,0 +1,3 @@ +from pathlib import Path + +PROJECT_SRC_PATH = str(Path(__file__).parent) diff --git a/src/import.kibana.py b/src/import.kibana.py index 2026800..9e664e6 100644 --- a/src/import.kibana.py +++ b/src/import.kibana.py @@ -11,8 +11,7 @@ """ import requests import sys -import os.path -from config import symbols +from StockSight.Initializer.ConfigReader import * if __name__ == '__main__': @@ -21,7 +20,7 @@ import_template = template_file.read() template_file.close() - for symbol in symbols: + for symbol in config['tickers']: try: ndjson_file_path = 'kibana_export/'+symbol+'_exports.ndjson' ndjson_file = open(ndjson_file_path, "wt", encoding='utf-8') @@ -43,4 +42,4 @@ except KeyboardInterrupt: print("Ctrl-c keyboard interrupt, exiting...") - sys.exit(0) \ No newline at end of file + sys.exit(0) diff --git a/src/kibana_export/export.7.3.ndjson b/src/kibana_export/export.7.3.ndjson index 94aea5e..06b342c 100644 --- a/src/kibana_export/export.7.3.ndjson +++ b/src/kibana_export/export.7.3.ndjson @@ -1,4 +1,4 @@ -{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"esTypes\":[\"_id\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"esTypes\":[\"_index\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"esTypes\":[\"_source\"],\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"esTypes\":[\"_type\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"date\",\"type\":\"date\",\"esTypes\":[\"date\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"location\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"location.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"location\",\"subType\":\"multi\"},{\"name\":\"message\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"message.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"message\",\"subType\":\"multi\"},{\"name\":\"polarity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"sentiment\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"sentiment.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"sentiment\",\"subType\":\"multi\"},{\"name\":\"subjectivity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"date","title":"stocksight_tmpl_*"},"id":"tmpl_index_pattern","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2019-08-31T04:11:15.828Z","version":"WzcsMV0="} +{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"esTypes\":[\"_id\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"esTypes\":[\"_index\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"esTypes\":[\"_source\"],\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"esTypes\":[\"_type\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"date\",\"type\":\"date\",\"esTypes\":[\"date\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"location\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"location.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"location\",\"subType\":\"multi\"},{\"name\":\"message\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"message.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"message\",\"subType\":\"multi\"},{\"name\":\"polarity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"sentiment\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"sentiment.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"sentiment\",\"subType\":\"multi\"},{\"name\":\"subjectivity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"date","title":"stocksight_*_tmpl"},"id":"tmpl_index_pattern","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2019-08-31T04:11:15.828Z","version":"WzcsMV0="} {"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl_polarity","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl_polarity\",\"type\":\"metric\",\"params\":{\"metric\":{\"percentageMode\":false,\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"metricColorMode\":\"None\",\"colorsRange\":[{\"type\":\"range\",\"from\":0,\"to\":10000}],\"labels\":{\"show\":true},\"invertColors\":false,\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\",\"fontSize\":32}},\"dimensions\":{\"metrics\":[{\"type\":\"vis_dimension\",\"accessor\":0,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":1,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":2,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":3,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":4,\"format\":{\"id\":\"number\",\"params\":{}}}]},\"addTooltip\":true,\"addLegend\":false,\"type\":\"metric\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"median\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\",\"percents\":[50],\"customLabel\":\"\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"5\",\"enabled\":true,\"type\":\"min\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}}]}"},"id":"tmpl_polarity_visual","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl_index_pattern","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-08-31T04:17:03.654Z","version":"WzgsMV0="} {"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl_sentinel","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl_sentinel\",\"type\":\"pie\",\"params\":{\"type\":\"pie\",\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"top\",\"isDonut\":false,\"labels\":{\"show\":true,\"values\":true,\"last_level\":true,\"truncate\":100},\"dimensions\":{\"metric\":{\"accessor\":1,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"},\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":5,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\"}}]}"},"id":"tmpl_polarity_sentinel","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl_index_pattern","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-08-31T04:19:36.071Z","version":"WzksMV0="} {"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl_articles","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl_articles\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}}]}"},"id":"tmpl_articles","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl_index_pattern","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-08-31T04:20:24.604Z","version":"WzEwLDFd"} diff --git a/src/news.sentiment.py b/src/news.sentiment.py index 7243138..8f08a5c 100644 --- a/src/news.sentiment.py +++ b/src/news.sentiment.py @@ -13,8 +13,12 @@ import argparse import sys +import threading +import time +from random import randint -from StockSight.NewsHeadlineListener import * +from StockSight.YahooFinanceListener import * +from StockSight.SeekAlphaListener import * from StockSight.EsMap.Sentiment import * @@ -50,18 +54,19 @@ requestslogger.disabled = True try: - for symbol in symbols: + for symbol in config['tickers']: try: - logger.info('Creating new Elasticsearch index or using existing ' + symbol) - es.indices.create(index="stocksight_"+symbol+"_sentiment", body=mapping, ignore=[400, 404]) - url = "https://finance.yahoo.com/quote/%s/?p=%s" % (symbol, symbol) + logger.info('Creating new Sentiment index or using existing ' + symbol) + es.indices.create(index=config['elasticsearch']['table_prefix']['sentiment']+symbol.lower(), body=mapping, ignore=[400, 404]) - logger.info('NLTK tokens required: ' + str(nltk_tokens_required)) - logger.info('NLTK tokens ignored: ' + str(nltk_tokens_ignored)) - logger.info("Scraping news for %s from %s ..." % (symbol, url)) + logger.info('NLTK tokens required: ' + str(config['tickers'][symbol])) + logger.info('NLTK tokens ignored: ' + str(config['sentiment_analyzer']['ignore_words'])) - # create instance of NewsHeadlineListener - newslistener = NewsHeadlineListener(symbol, url) + yahooListener = YahooFinanceListener(symbol) + yahooThread = threading.Thread(target=yahooListener.execute) + yahooThread.start() + + time.sleep(randint(5,15)) except Exception as e: logger.warning("%s" % e) pass diff --git a/src/sentiment.og.py b/src/sentiment.og.py deleted file mode 100644 index c6d6c9a..0000000 --- a/src/sentiment.og.py +++ /dev/null @@ -1,802 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -"""sentiment.py - analyze tweets on Twitter and add -relevant tweets and their sentiment values to -Elasticsearch. -See README.md or https://github.com/shirosaidev/stocksight -for more information. - -Copyright (C) Chris Park 2018 -stocksight is released under the Apache 2.0 license. See -LICENSE for the full license text. -""" - -import sys -import json -import time -import re -import unicodedata -import requests -import nltk -import argparse -import logging -try: - import urllib.parse as urlparse -except ImportError: - import urlparse -from tweepy.streaming import StreamListener -from tweepy import API, Stream, OAuthHandler, TweepError -from textblob import TextBlob -from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer -from bs4 import BeautifulSoup -try: - from elasticsearch5 import Elasticsearch -except ImportError: - from elasticsearch import Elasticsearch -from random import randint -from datetime import datetime - -# import elasticsearch host, twitter keys and tokens -from config import * - - -STOCKSIGHT_VERSION = '0.1-b.6' -__version__ = STOCKSIGHT_VERSION - -IS_PY3 = sys.version_info >= (3, 0) - -if IS_PY3: - unicode = str - -# create instance of elasticsearch -es = Elasticsearch(hosts=[{'host': elasticsearch_host, 'port': elasticsearch_port}], - http_auth=(elasticsearch_user, elasticsearch_password)) - -# sentiment text-processing url -sentimentURL = 'http://text-processing.com/api/sentiment/' - -# tweet id list -tweet_ids = [] - -# file to hold twitter user ids -twitter_users_file = './twitteruserids.txt' - - -class TweetStreamListener(StreamListener): - - # on success - def on_data(self, data): - - try: - # decode json - dict_data = json.loads(data) - - logger.debug(dict_data) - - # clean up tweet text - #text = unicodedata.normalize( - # 'NFKD', dict_data["text"]).encode('ascii', 'ignore') - text = dict_data["text"] - if text is None: - logger.info("Tweet has no relevant text, skipping") - return True - - # grab html links from tweet - #tweet_urls = re.search("http\S+", text) - - # clean up tweet text more - text = text.replace("\n", " ") - text = re.sub(r"http\S+", "", text) - text = re.sub(r"&.*?;", "", text) - text = re.sub(r"<.*?>", "", text) - text = text.replace("RT", "") - text = text.replace(u"…", "") - text = text.strip() - - # get date when tweet was created - created_date = time.strftime( - '%Y-%m-%dT%H:%M:%S', time.strptime(dict_data['created_at'], '%a %b %d %H:%M:%S +0000 %Y')) - - # store dict_data into vars - screen_name = str(dict_data.get("user", {}).get("screen_name")) - location = str(dict_data.get("user", {}).get("location")) - language = str(dict_data.get("user", {}).get("lang")) - friends = int(dict_data.get("user", {}).get("friends_count")) - followers = int(dict_data.get("user", {}).get("followers_count")) - statuses = int(dict_data.get("user", {}).get("statuses_count")) - text_filtered = str(text) - tweetid = int(dict_data.get("id")) - text_raw = unicode(dict_data.get("text")) - - # output twitter data - print("\n------------------------------") - print("Tweet Date: " + created_date) - print("Screen Name: " + screen_name) - print("Location: " + location) - print("Language: " + language) - print("Friends: " + str(friends)) - print("Followers: " + str(followers)) - print("Statuses: " + str(statuses)) - print("Tweet ID: " + str(tweetid)) - print("Tweet Raw Text: " + text_raw) - print("Tweet Filtered Text: " + text_filtered) - - # create tokens of words in text using nltk - text_for_tokens = re.sub( - r"[\%|\$|\.|\,|\!|\:|\@]|\(|\)|\#|\+|(``)|('')|\?|\-", "", text_filtered) - tokens = nltk.word_tokenize(text_for_tokens) - print("NLTK Tokens: " + str(tokens)) - - # do some checks before adding to elasticsearch and crawling urls in tweet - if friends == 0 or \ - followers == 0 or \ - statuses == 0 or \ - text == "" or \ - tweetid in tweet_ids: - logger.info("Tweet doesn't meet min requirements, not adding") - return True - - # check ignored tokens from config - for t in nltk_tokens_ignored: - if t in tokens: - logger.info("Tweet contains token from ignore list, not adding") - return True - # check required tokens from config - tokenspass = False - for t in nltk_tokens_required: - if t in tokens: - tokenspass = True - break - if not tokenspass: - logger.info("Tweet does not contain token from required list, not adding") - return True - - # strip out hashtags for language processing - tweet = re.sub(r"[#|@|\$]\S+", "", text) - tweet.strip() - - # get sentiment values - polarity, subjectivity, sentiment = sentiment_analysis(tweet) - - # add tweet_id to list - tweet_ids.append(dict_data["id"]) - - # remove hashtags for elasticsearch - #text_filtered = re.sub(r"[#|@|\$]\S+", "", text_filtered) - - logger.info("Adding tweet to elasticsearch") - # add twitter data and sentiment info to elasticsearch - es.index(index=args.index, - doc_type="tweet", - body={"author": screen_name, - "location": location, - "language": language, - "friends": friends, - "followers": followers, - "statuses": statuses, - "date": created_date, - "message": text_filtered, - "tweet_id": tweetid, - "polarity": polarity, - "subjectivity": subjectivity, - "sentiment": sentiment}) - - return True - - except Exception as e: - logger.warning("Exception: exception caused by: %s" % e) - raise - - # on failure - def on_error(self, status_code): - logger.error("Got an error with status code: %s" % status_code) - return True - - # on timeout - def on_timeout(self): - logger.warning("Timeout...") - return True - - -class NewsHeadlineListener: - def __init__(self, url=None, frequency=120): - self.url = url - self.headlines = [] - self.followedlinks = [] - self.frequency = frequency - - while True: - new_headlines = self.get_news_headlines(self.url) - - # add any new headlines - for htext, htext_url in new_headlines: - if htext not in self.headlines: - self.headlines.append(htext) - - datenow = datetime.utcnow().isoformat() - # output news data - print("\n------------------------------") - print("Date: " + datenow) - print("News Headline: " + htext) - print("Location (url): " + htext_url) - - # create tokens of words in text using nltk - text_for_tokens = re.sub( - r"[\%|\$|\.|\,|\!|\:|\@]|\(|\)|\#|\+|(``)|('')|\?|\-", "", htext) - tokens = nltk.word_tokenize(text_for_tokens) - print("NLTK Tokens: " + str(tokens)) - - # check ignored tokens from config - for t in nltk_tokens_ignored: - if t in tokens: - logger.info("Text contains token from ignore list, not adding") - continue - # check required tokens from config - tokenspass = False - for t in nltk_tokens_required: - if t in tokens: - tokenspass = True - break - if not tokenspass: - logger.info("Text does not contain token from required list, not adding") - continue - - # get sentiment values - polarity, subjectivity, sentiment = sentiment_analysis(htext) - - logger.info("Adding news headline to elasticsearch") - # add news headline data and sentiment info to elasticsearch - es.index(index=args.index, - doc_type="newsheadline", - body={"date": datenow, - "location": htext_url, - "message": htext, - "polarity": polarity, - "subjectivity": subjectivity, - "sentiment": sentiment}) - - logger.info("Will get news headlines again in %s sec..." % self.frequency) - time.sleep(self.frequency) - - def get_news_headlines(self, url): - - latestheadlines = [] - latestheadlines_links = [] - parsed_uri = urlparse.urljoin(url, '/') - - try: - - req = requests.get(url) - html = req.text - soup = BeautifulSoup(html, 'html.parser') - html = soup.findAll('h3') - links = soup.findAll('a') - - logger.debug(html) - logger.debug(links) - - if html: - for i in html: - latestheadlines.append((i.next.next.next.next, url)) - logger.debug(latestheadlines) - - if args.followlinks: - if links: - for i in links: - if '/news/' in i['href']: - l = parsed_uri.rstrip('/') + i['href'] - if l not in self.followedlinks: - latestheadlines_links.append(l) - self.followedlinks.append(l) - logger.debug(latestheadlines_links) - - logger.info("Following any new links and grabbing text from page...") - - for linkurl in latestheadlines_links: - for p in get_page_text(linkurl): - latestheadlines.append((p, linkurl)) - logger.debug(latestheadlines) - - except requests.exceptions.RequestException as re: - logger.warning("Exception: can't crawl web site (%s)" % re) - pass - - return latestheadlines - - -def get_page_text(url): - - max_paragraphs = 10 - - try: - logger.debug(url) - req = requests.get(url) - html = req.text - soup = BeautifulSoup(html, 'html.parser') - html_p = soup.findAll('p') - - logger.debug(html_p) - - if html_p: - n = 1 - for i in html_p: - if n <= max_paragraphs: - if i.string is not None: - logger.debug(i.string) - yield i.string - n += 1 - - except requests.exceptions.RequestException as re: - logger.warning("Exception: can't crawl web site (%s)" % re) - pass - - -def get_sentiment_from_url(text, sentimentURL): - payload = {'text': text} - - try: - post = requests.post(sentimentURL, data=payload) - logger.debug(post.status_code) - logger.debug(post.text) - except requests.exceptions.RequestException as re: - logger.error("Exception: requests exception getting sentiment from url caused by %s" % re) - raise - - # return None if we are getting throttled or other connection problem - if post.status_code != 200: - logger.warning("Can't get sentiment from url caused by %s %s" % (post.status_code, post.text)) - return None - - response = post.json() - logger.debug(response) - - # neg = response['probability']['neg'] - # neutral = response['probability']['neutral'] - # pos = response['probability']['pos'] - label = response['label'] - - # determine if sentiment is positive, negative, or neutral - if label == "neg": - sentiment = "negative" - elif label == "neutral": - sentiment = "neutral" - else: - sentiment = "positive" - - return sentiment - - -def sentiment_analysis(text): - """Determine if sentiment is positive, negative, or neutral - algorithm to figure out if sentiment is positive, negative or neutral - uses sentiment polarity from TextBlob, VADER Sentiment and - sentiment from text-processing URL - could be made better :) - """ - - # pass text into sentiment url - sentiment_url = get_sentiment_from_url(text, sentimentURL) - - # pass text into TextBlob - text_tb = TextBlob(text) - - # pass text into VADER Sentiment - analyzer = SentimentIntensityAnalyzer() - text_vs = analyzer.polarity_scores(text) - - if sentiment_url is None: - if text_tb.sentiment.polarity <= 0 and text_vs['compound'] <= -0.5: - sentiment = "negative" # very negative - elif text_tb.sentiment.polarity <= 0 and text_vs['compound'] <= -0.1: - sentiment = "negative" # somewhat negative - elif text_tb.sentiment.polarity == 0 and text_vs['compound'] > -0.1 and text_vs['compound'] < 0.1: - sentiment = "neutral" - elif text_tb.sentiment.polarity >= 0 and text_vs['compound'] >= 0.1: - sentiment = "positive" # somewhat positive - elif text_tb.sentiment.polarity > 0 and text_vs['compound'] >= 0.1: - sentiment = "positive" # very positive - else: - sentiment = "neutral" - else: - if text_tb.sentiment.polarity < 0 and text_vs['compound'] <= -0.1 and sentiment_url == "negative": - sentiment = "negative" # very negative - elif text_tb.sentiment.polarity <= 0 and text_vs['compound'] < 0 and sentiment_url == "neutral": - sentiment = "negative" # somewhat negative - elif text_tb.sentiment.polarity >= 0 and text_vs['compound'] > 0 and sentiment_url == "neutral": - sentiment = "positive" # somewhat positive - elif text_tb.sentiment.polarity > 0 and text_vs['compound'] >= 0.1 and sentiment_url == "positive": - sentiment = "positive" # very positive - else: - sentiment = "neutral" - - # calculate average polarity from TextBlob and VADER - polarity = (text_tb.sentiment.polarity + text_vs['compound']) / 2 - # output sentiment polarity - print("Sentiment Polarity: " + str(polarity)) - - # output sentiment subjectivity (TextBlob) - print("Sentiment Subjectivity: " + str(text_tb.sentiment.subjectivity)) - - # output sentiment - print("Sentiment (url): " + str(sentiment_url)) - print("Sentiment (algorithm): " + str(sentiment)) - - return polarity, text_tb.sentiment.subjectivity, sentiment - - -def get_twitter_users_from_url(url): - twitter_users = [] - logger.info("Grabbing any twitter users from url %s" % url) - try: - twitter_urls = ("http://twitter.com/", "http://www.twitter.com/", - "https://twitter.com/", "https://www.twitter.com/") - # req_header = {'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Safari/604.1.38"} - req = requests.get(url) - html = req.text - soup = BeautifulSoup(html, 'html.parser') - html_links = [] - for link in soup.findAll('a'): - html_links.append(link.get('href')) - if html_links: - for link in html_links: - # check if twitter_url in link - parsed_uri = urlparse.urljoin(link, '/') - # get twitter user name from link and add to list - if parsed_uri in twitter_urls and "=" not in link and "?" not in link: - user = link.split('/')[3] - twitter_users.append(u'@' + user) - logger.debug(twitter_users) - except requests.exceptions.RequestException as re: - logger.warning("Requests exception: can't crawl web site caused by: %s" % re) - pass - return twitter_users - - -def get_twitter_users_from_file(file): - # get twitter user ids from text file - twitter_users = [] - logger.info("Grabbing any twitter user ids from file %s" % file) - try: - f = open(file, "rt", encoding='utf-8') - for line in f.readlines(): - u = line.strip() - twitter_users.append(u) - logger.debug(twitter_users) - f.close() - except (IOError, OSError) as e: - logger.warning("Exception: error opening file caused by: %s" % e) - pass - return twitter_users - - -if __name__ == '__main__': - # parse cli args - parser = argparse.ArgumentParser() - parser.add_argument("-i", "--index", metavar="INDEX", default="stocksight", - help="Index name for Elasticsearch (default: stocksight)") - parser.add_argument("-d", "--delindex", action="store_true", - help="Delete existing Elasticsearch index first") - parser.add_argument("-k", "--keywords", metavar="KEYWORDS", - help="Use keywords to search for in Tweets instead of feeds. " - "Separated by comma, case insensitive, spaces are ANDs commas are ORs. " - "Example: TSLA,'Elon Musk',Musk,Tesla,SpaceX") - parser.add_argument("-u", "--url", metavar="URL", - help="Use twitter users from any links in web page at url") - parser.add_argument("-f", "--file", metavar="FILE", - help="Use twitter user ids from file") - parser.add_argument("-n", "--newsheadlines", metavar="SYMBOL", - help="Get news headlines instead of Twitter using stock symbol, example: TSLA") - parser.add_argument("--frequency", metavar="FREQUENCY", default=120, type=int, - help="How often in seconds to retrieve news headlines (default: 120 sec)") - parser.add_argument("--followlinks", action="store_true", - help="Follow links on news headlines and scrape relevant text from landing page") - parser.add_argument("-v", "--verbose", action="store_true", - help="Increase output verbosity") - parser.add_argument("--debug", action="store_true", - help="Debug message output") - parser.add_argument("-q", "--quiet", action="store_true", - help="Run quiet with no message output") - parser.add_argument("-V", "--version", action="version", - version="stocksight v%s" % STOCKSIGHT_VERSION, - help="Prints version and exits") - args = parser.parse_args() - - # set up logging - logger = logging.getLogger('stocksight') - logger.setLevel(logging.INFO) - eslogger = logging.getLogger('elasticsearch') - eslogger.setLevel(logging.WARNING) - tweepylogger = logging.getLogger('tweepy') - tweepylogger.setLevel(logging.INFO) - requestslogger = logging.getLogger('requests') - requestslogger.setLevel(logging.INFO) - logging.addLevelName( - logging.INFO, "\033[1;32m%s\033[1;0m" - % logging.getLevelName(logging.INFO)) - logging.addLevelName( - logging.WARNING, "\033[1;31m%s\033[1;0m" - % logging.getLevelName(logging.WARNING)) - logging.addLevelName( - logging.ERROR, "\033[1;41m%s\033[1;0m" - % logging.getLevelName(logging.ERROR)) - logging.addLevelName( - logging.DEBUG, "\033[1;33m%s\033[1;0m" - % logging.getLevelName(logging.DEBUG)) - logformatter = '%(asctime)s [%(levelname)s][%(name)s] %(message)s' - loglevel = logging.INFO - logging.basicConfig(format=logformatter, level=loglevel) - if args.verbose: - logger.setLevel(logging.INFO) - eslogger.setLevel(logging.INFO) - tweepylogger.setLevel(logging.INFO) - requestslogger.setLevel(logging.INFO) - if args.debug: - logger.setLevel(logging.DEBUG) - eslogger.setLevel(logging.DEBUG) - tweepylogger.setLevel(logging.DEBUG) - requestslogger.setLevel(logging.DEBUG) - if args.quiet: - logger.disabled = True - eslogger.disabled = True - tweepylogger.disabled = True - requestslogger.disabled = True - - # print banner - if not args.quiet: - c = randint(1, 4) - if c == 1: - color = '31m' - elif c == 2: - color = '32m' - elif c == 3: - color = '33m' - elif c == 4: - color = '35m' - - banner = """\033[%s - - /$$ /$$ /$$ /$$ /$$ - | $$ | $$ |__/ | $$ | $$ - /$$$$$$$ /$$$$$$ /$$$$$$ /$$$$$$$| $$ /$$ /$$$$$$$ /$$ /$$$$$$ | $$$$$$$ /$$$$$$ - /$$_____/|_ $$_/ /$$__ $$ /$$_____/| $$ /$$/ /$$_____/| $$ /$$__ $$| $$__ $$|_ $$_/ -| $$$$$$ | $$ | $$ \ $$| $$ | $$$$$$/ | $$$$$$ | $$| $$ \ $$| $$ \ $$ | $$ - \____ $$ | $$ /$$| $$ | $$| $$ | $$_ $$ \____ $$| $$| $$ | $$| $$ | $$ | $$ /$$ - /$$$$$$$/ | $$$$/| $$$$$$/| $$$$$$$| $$ \ $$ /$$$$$$$/| $$| $$$$$$$| $$ | $$ | $$$$/ -|_______/ \___/ \______/ \_______/|__/ \__/|_______/ |__/ \____ $$|__/ |__/ \___/ - /$$ \ $$ - :) = +$ :( = -$ | $$$$$$/ - \______/ v%s - \033[0m""" % (color, STOCKSIGHT_VERSION) - print(banner + '\n') - - # set up elasticsearch mappings and create index - mappings = { - "mappings": { - "tweet": { - "properties": { - "author": { - "type": "string", - "fields": { - "keyword": { - "type": "keyword" - } - } - }, - "location": { - "type": "string", - "fields": { - "keyword": { - "type": "keyword" - } - } - }, - "language": { - "type": "string", - "fields": { - "keyword": { - "type": "keyword" - } - } - }, - "friends": { - "type": "long" - }, - "followers": { - "type": "long" - }, - "statuses": { - "type": "long" - }, - "date": { - "type": "date" - }, - "message": { - "type": "string", - "fields": { - "english": { - "type": "string", - "analyzer": "english" - }, - "keyword": { - "type": "keyword" - } - } - }, - "tweet_id": { - "type": "long" - }, - "polarity": { - "type": "float" - }, - "subjectivity": { - "type": "float" - }, - "sentiment": { - "type": "string", - "fields": { - "keyword": { - "type": "keyword" - } - } - } - } - }, - "newsheadline": { - "properties": { - "date": { - "type": "date" - }, - "location": { - "type": "string", - "fields": { - "keyword": { - "type": "keyword" - } - } - }, - "message": { - "type": "string", - "fields": { - "english": { - "type": "string", - "analyzer": "english" - }, - "keyword": { - "type": "keyword" - } - } - }, - "polarity": { - "type": "float" - }, - "subjectivity": { - "type": "float" - }, - "sentiment": { - "type": "string", - "fields": { - "keyword": { - "type": "keyword" - } - } - } - } - } - } - } - if args.delindex: - logger.info('Deleting existing Elasticsearch index ' + args.index) - es.indices.delete(index=args.index, ignore=[400, 404]) - - logger.info('Creating new Elasticsearch index or using existing ' + args.index) - es.indices.create(index=args.index, body=mappings, ignore=[400, 404]) - - # are we grabbing news headlines from yahoo finance or twitter - if args.newsheadlines: - try: - url = "https://finance.yahoo.com/quote/%s/?p=%s" % (args.newsheadlines, args.newsheadlines) - - logger.info('NLTK tokens required: ' + str(nltk_tokens_required)) - logger.info('NLTK tokens ignored: ' + str(nltk_tokens_ignored)) - logger.info("Scraping news for %s from %s ..." % (args.newsheadlines, url)) - - # create instance of NewsHeadlineListener - newslistener = NewsHeadlineListener(url, args.frequency) - except KeyboardInterrupt: - print("Ctrl-c keyboard interrupt, exiting...") - sys.exit(0) - - else: - # create instance of the tweepy tweet stream listener - tweetlistener = TweetStreamListener() - - # set twitter keys/tokens - auth = OAuthHandler(consumer_key, consumer_secret) - auth.set_access_token(access_token, access_token_secret) - api = API(auth) - - # create instance of the tweepy stream - stream = Stream(auth, tweetlistener) - - # grab any twitter users from links in web page at url - if args.url: - twitter_users = get_twitter_users_from_url(args.url) - if len(twitter_users) > 0: - twitter_feeds = twitter_users - else: - logger.info("No twitter users found in links on web page, exiting") - sys.exit(1) - - # grab twitter users from file - if args.file: - twitter_users = get_twitter_users_from_file(args.file) - if len(twitter_users) > 0: - useridlist = twitter_users - else: - logger.info("No twitter users found in file, exiting") - sys.exit(1) - else: - # build user id list from user names - logger.info("Looking up Twitter user ids from usernames...") - useridlist = [] - while True: - for u in twitter_feeds: - try: - # get user id from screen name using twitter api - user = api.get_user(screen_name=u) - uid = int(user.id) - if uid not in useridlist: - useridlist.append(uid) - time.sleep(randint(0, 2)) - except TweepError as te: - # sleep a bit in case twitter suspends us - logger.warning("Tweepy exception: twitter api error caused by: %s" % te) - logger.info("Sleeping for a random amount of time and retrying...") - time.sleep(randint(1,10)) - continue - except KeyboardInterrupt: - logger.info("Ctrl-c keyboard interrupt, exiting...") - stream.disconnect() - sys.exit(0) - break - - if len(useridlist) > 0: - logger.info('Writing twitter user ids to text file %s' % twitter_users_file) - try: - f = open(twitter_users_file, "wt", encoding='utf-8') - for i in useridlist: - line = str(i) + "\n" - if type(line) is bytes: - line = line.decode('utf-8') - f.write(line) - f.close() - except (IOError, OSError) as e: - logger.warning("Exception: error writing to file caused by: %s" % e) - pass - except Exception as e: - raise - - try: - # search twitter for keywords - logger.info('NLTK tokens required: ' + str(nltk_tokens_required)) - logger.info('NLTK tokens ignored: ' + str(nltk_tokens_ignored)) - logger.info('Twitter Feeds: ' + str(twitter_feeds)) - logger.info('Twitter User Ids: ' + str(useridlist)) - logger.info('Twitter keywords: ' + str(args.keywords)) - logger.info('Listening for Tweets (ctrl-c to exit)...') - if args.keywords is None: - stream.filter(follow=useridlist, languages=['en']) - else: - # keywords to search on twitter - # add keywords to list - keywords = args.keywords.split(',') - # add tokens to keywords to list - for f in nltk_tokens_required: - keywords.append(f) - stream.filter(track=keywords, languages=['en']) - except TweepError as te: - logger.debug("Tweepy Exception: Failed to get tweets caused by: %s" % te) - except KeyboardInterrupt: - print("Ctrl-c keyboard interrupt, exiting...") - stream.disconnect() - sys.exit(0) \ No newline at end of file diff --git a/src/startup.sh b/src/startup.sh index 103b8d8..916f464 100755 --- a/src/startup.sh +++ b/src/startup.sh @@ -4,8 +4,36 @@ #stocksight is released under the Apache 2.0 license. See #LICENSE for the full license text. -echo "Waiting for other dependent instances to spawn... (30 seconds)" -sleep 30; + +#Wait script based on https://github.com/elastic/elasticsearch-py/issues/778#issuecomment-384389668 +host='http://elasticsearch:9200' + +until $(curl --output /dev/null --silent --head --fail "$host"); do + printf '.' + sleep 5 +done + +# First wait for ES to start... +response=$(curl $host) + +until [ "$response" = "200" ]; do + response=$(curl --write-out %{http_code} --silent --output /dev/null "$host") + >&2 echo "Elastic Search is unavailable - sleeping" + sleep 5 +done + + +# next wait for ES status to turn to Green +health="$(curl -fsSL "$host/_cat/health?h=status")" +health="$(echo "$health" | sed -r 's/^[[:space:]]+|[[:space:]]+$//g')" # trim whitespace (otherwise we'll have "green ") + +until [ "$health" = 'green' ] || [ "$health" = 'yellow' ]; do + health="$(curl -fsSL "$host/_cat/health?h=status")" + health="$(echo "$health" | sed -r 's/^[[:space:]]+|[[:space:]]+$//g')" # trim whitespace (otherwise we'll have "green ") + >&2 echo "Elastic Search is not healthy." + sleep 5 +done + echo "Copy kibana dashboard if they don't exist"; python import.kibana.py @@ -20,7 +48,7 @@ let sentiment_time=900*4 while true do echo "Spawning stock price receiver instance"; - python stockprice.docker.py & + python stockprice.py & echo "Will get stock data again in ${tick_time} sec..."; let tick_mod=tick%4 diff --git a/src/stockprice.docker.py b/src/stockprice.py similarity index 83% rename from src/stockprice.docker.py rename to src/stockprice.py index 7ba3d15..0620f4a 100644 --- a/src/stockprice.docker.py +++ b/src/stockprice.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -"""stockprice.docker.py - get stock price from Yahoo and add to +"""stockprice.py - get stock price from News sources and add to Elasticsearch. See README.md or https://github.com/shirosaidev/stocksight for more information. @@ -13,8 +13,11 @@ import argparse import logging import sys +import threading +import time +from random import randint -from config import symbols +from StockSight.Initializer.ConfigReader import * from StockSight.Initializer.ElasticSearch import es from StockSight.EsMap.StockPrice import mapping from StockSight.StockPriceListener import StockPriceListener @@ -78,14 +81,18 @@ requestslogger.disabled = True try: - for symbol in symbols: + for symbol in config['tickers']: try: - logger.info('Creating new Elasticsearch index or using existing ' + symbol) - es.indices.create(index="stocksight_"+symbol+"_price", body=mapping, ignore=[400, 404]) + logger.info('Creating new Price index or using existing ' + symbol) + es.indices.create(index=config['elasticsearch']['table_prefix']['sentiment']+symbol.lower(), body=mapping, ignore=[400, 404]) stockprice = StockPriceListener() - stockprice.get_price(symbol=symbol) + priceThread = threading.Thread(target=stockprice.get_price,args=(symbol,)) + priceThread.start() + + time.sleep(randint(5,15)) + except Exception as e: logger.warning("%s" % e) pass diff --git a/src/twitteruserids.txt b/src/twitteruserids.txt deleted file mode 100644 index 3122fce..0000000 --- a/src/twitteruserids.txt +++ /dev/null @@ -1,32 +0,0 @@ -44196397 -20402945 -44060322 -18193185 -2884771 -2467791 -15897179 -28571999 -28164923 -455309376 -1754641 -21328656 -1357098067 -14216123 -14096763 -861619895485726722 -1534167900 -22522178 -71567590 -168679374 -16228398 -33216611 -332617373 -426159377 -60783724 -253167239 -553713584 -40129171 -714051110 -101002059 -5695632 -807095 diff --git a/tests/StockSight/YahooFinanceListenerTest.py b/tests/StockSight/YahooFinanceListenerTest.py new file mode 100644 index 0000000..77413b2 --- /dev/null +++ b/tests/StockSight/YahooFinanceListenerTest.py @@ -0,0 +1,53 @@ +import unittest +from StockSight.YahooFinanceListener import * +from StockSight.EsMap.Sentiment import mapping + +class YahooFinanceListenerTest(unittest.TestCase): + + symbol = 'tsla' + + def setUp(self): + config['redis']['db'] = 1 + self.mainClass = YahooFinanceListener(self.symbol); + + @classmethod + def setUpClass(cls): + cls.index_name = "stocksight_sentiment_test_"+cls.symbol; + es.indices.create(index=cls.index_name, body=mapping, ignore=[400, 404]) + + @classmethod + def tearDownClass(cls): + rds.flushdb() + es.indices.delete(index=cls.index_name,ignore=[400, 404]) + + def test_get_news_headlines(self): + headlines = self.mainClass.get_news_headlines() + self.assertGreaterEqual(headlines.__len__(), 1, "Empty Headline") + self.assertIsInstance(headlines[0], Article, "Is not an Article") + self.assertIsNotNone(headlines[0].title, "Title is empty") + self.assertIsNotNone(headlines[0].url, "URL is empty") + self.assertIsNotNone(headlines[0].refer_url, "Refer URL is empty") + + def test_get_news_headlines_with_body(self): + config['news']['follow_link'] = True + headlines = self.mainClass.get_news_headlines() + self.assertGreaterEqual(headlines.__len__(), 1, "Empty Headline") + self.assertIsInstance(headlines[0], Article, "Is not an Article") + self.assertIsNotNone(headlines[0].title, "Title is empty") + self.assertIsNotNone(headlines[0].url, "URL is empty") + self.assertNotEqual(headlines[0].url, '', "Refer URL is empty") + self.assertNotEqual(headlines[0].body, '', "Refer URL is empty") + + def test_execute(self): + self.mainClass.index_name = self.index_name + self.mainClass.execute() + logs = es.search(index=self.index_name,body="{}") + message = logs['hits']['hits'][0]['_source'] + self.assertIsNotNone(message['title'], "Title is empty") + self.assertIsNotNone(message['url'], "URL is empty") + self.assertNotEqual(message['referer_url'], '', "Refer URL is empty") + self.assertIsNotNone(message['sentiment'], "Sentiment is empty") + self.assertIsNotNone(message['polarity'], "Polarity is empty") + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From e86efe70e7f8c81508df02f81131bd70b8e4c16a Mon Sep 17 00:00:00 2001 From: heyqule Date: Sun, 8 Sep 2019 21:29:58 -0400 Subject: [PATCH 28/55] Fix twitter --- src/StockSight/TweetListener.py | 16 +++++++++------- src/import.kibana.py | 3 ++- src/startup.sh | 2 +- src/tweet.sentiment.py | 13 +++++++++---- 4 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/StockSight/TweetListener.py b/src/StockSight/TweetListener.py index 173df39..a65f33a 100644 --- a/src/StockSight/TweetListener.py +++ b/src/StockSight/TweetListener.py @@ -17,7 +17,7 @@ import nltk -from definitions import * +from StockSight.Initializer.ConfigReader import * from StockSight.Initializer.ElasticSearch import es from StockSight.Initializer.Redis import rds from StockSight.Helper.Sentiment import * @@ -95,7 +95,7 @@ def on_data(self, data): # do some checks before adding to elasticsearch and crawling urls in tweet if friends == 0 or \ - followers < min_followers or \ + followers < config['twitter']['min_followers'] or \ statuses == 0 or \ text == "": logger.info("Tweet doesn't meet min requirements, not adding") @@ -107,16 +107,15 @@ def on_data(self, data): return True # check ignored tokens from config - for t in nltk_tokens_ignored: + for t in config['sentiment_analyzer']['ignore_words']: if t in tokens: logger.info("Tweet contains token from ignore list, not adding") return True # check required tokens from config tokenspass = False - for key, nltk_tokens_required_sublist in nltk_tokens_required.items(): - if(key == 'default'): continue + for key in config['tickers']: self.symbol = key - for t in nltk_tokens_required_sublist: + for t in config['tickers'][key]: if t in tokens: tokenspass = True break @@ -137,9 +136,10 @@ def on_data(self, data): # remove hashtags for elasticsearch #text_filtered = re.sub(r"[#|@|\$]\S+", "", text_filtered) + self.index_name = config['elasticsearch']['table_prefix']['sentiment']+self.symbol.lower() logger.info("Adding tweet to elasticsearch") # add twitter data and sentiment info to elasticsearch - es.index(index="stocksight_"+self.symbol+"_sentiment", + es.index(index=self.index_name, doc_type="_doc", body={ "_id": redis_id, @@ -199,6 +199,7 @@ def get_twitter_users_from_url(url): except requests.exceptions.RequestException as re: logger.warning("Requests exception: can't crawl web site caused by: %s" % re) pass + return twitter_users @@ -215,4 +216,5 @@ def get_twitter_users_from_file(file): except (IOError, OSError) as e: logger.warning("Exception: error opening file caused by: %s" % e) pass + return twitter_users \ No newline at end of file diff --git a/src/import.kibana.py b/src/import.kibana.py index 9e664e6..db5b0c0 100644 --- a/src/import.kibana.py +++ b/src/import.kibana.py @@ -22,6 +22,7 @@ for symbol in config['tickers']: try: + print("Imported %s Kibana Dashboard" % symbol) ndjson_file_path = 'kibana_export/'+symbol+'_exports.ndjson' ndjson_file = open(ndjson_file_path, "wt", encoding='utf-8') final_text = import_template.replace('tmpl',symbol) @@ -32,7 +33,7 @@ payload = { 'overwrite': 'false'} headers ={'kbn-xsrf': 'True'} post = requests.request('POST',kibana_import_url, headers=headers, files={'file': open(ndjson_file_path, "rt", encoding='utf-8')}) - print("Import %s Kibana Dashboard" % symbol) + print("Imported %s Kibana Dashboard" % symbol) print(ndjson_file_path) print(post.text) diff --git a/src/startup.sh b/src/startup.sh index 916f464..4afa5e3 100755 --- a/src/startup.sh +++ b/src/startup.sh @@ -36,7 +36,7 @@ done echo "Copy kibana dashboard if they don't exist"; -python import.kibana.py +python import.kibana.py & tick_time=900 tick=0 diff --git a/src/tweet.sentiment.py b/src/tweet.sentiment.py index 75cec62..4689b76 100644 --- a/src/tweet.sentiment.py +++ b/src/tweet.sentiment.py @@ -15,6 +15,7 @@ import sys from random import randint +from StockSight.Initializer.ConfigReader import * from StockSight.TweetListener import * from StockSight.EsMap.Sentiment import * from tweepy import API, Stream, OAuthHandler, TweepError @@ -52,7 +53,11 @@ requestslogger.disabled = True - + consumer_key = config['twitter']['consumer_key'] + consumer_secret = config['twitter']['consumer_secret'] + access_token = config['twitter']['access_token'] + access_token_secret = config['twitter']['access_token_secret'] + twitter_feeds = config['twitter']['feeds'] #TODO exit if the twitter keys are empty if not consumer_key or \ @@ -64,7 +69,7 @@ try: - for symbol in symbols: + for symbol in config['twitter']: logger.info('Creating new Elasticsearch index or using existing ' + symbol) es.indices.create(index="stocksight_"+symbol+"_sentiment", body=mapping, ignore=[400, 404]) @@ -125,8 +130,8 @@ # search twitter for keywords - logger.info('NLTK tokens required: ' + str(nltk_tokens_required)) - logger.info('NLTK tokens ignored: ' + str(nltk_tokens_ignored)) + logger.info('NLTK tokens required: ' + str(config['tickers'])) + logger.info('NLTK tokens ignored: ' + str(config['sentiment_analyzer']['ignore_words'])) logger.info('Twitter Feeds: ' + str(twitter_feeds)) logger.info('Twitter User Ids: ' + str(useridlist)) logger.info('Listening for Tweets (ctrl-c to exit)...') From 5f1d87f156485ba7104bfe5ccdd67de197306b13 Mon Sep 17 00:00:00 2001 From: heyqule Date: Sun, 8 Sep 2019 21:53:19 -0400 Subject: [PATCH 29/55] Since it's single node insance, disable replica --- src/StockSight/EsMap/Sentiment.py | 6 ++++-- src/StockSight/EsMap/StockPrice.py | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/StockSight/EsMap/Sentiment.py b/src/StockSight/EsMap/Sentiment.py index c55f4eb..6dc701b 100644 --- a/src/StockSight/EsMap/Sentiment.py +++ b/src/StockSight/EsMap/Sentiment.py @@ -37,8 +37,10 @@ } } }, - "index" : { - "number_of_replicas" : "0" + "settings": { + "index": { + "number_of_replicas": "0" + } } } diff --git a/src/StockSight/EsMap/StockPrice.py b/src/StockSight/EsMap/StockPrice.py index 43408be..8a4fe57 100644 --- a/src/StockSight/EsMap/StockPrice.py +++ b/src/StockSight/EsMap/StockPrice.py @@ -25,7 +25,9 @@ } } }, - "index" : { - "number_of_replicas" : "0" + "settings": { + "index": { + "number_of_replicas": "0" + } } } \ No newline at end of file From 3e862ec20d52be5434bcf5024a0e2188a04fd2aa Mon Sep 17 00:00:00 2001 From: heyqule Date: Sun, 8 Sep 2019 23:59:30 -0400 Subject: [PATCH 30/55] Refactors --- src/StockSight/EsMap/StockPrice.py | 2 +- src/StockSight/Helper/Sentiment.py | 14 +-- src/StockSight/Initializer/ConfigReader.py | 6 +- src/StockSight/Initializer/ElasticSearch.py | 2 +- src/StockSight/Initializer/Logger.py | 2 +- src/StockSight/Initializer/Redis.py | 2 +- src/StockSight/Model/Article.py | 2 +- src/StockSight/NewsHeadlineListener.py | 23 +++-- src/StockSight/SeekAlphaListener.py | 100 +++++++++++++------ src/StockSight/StockPriceListener.py | 26 +++-- src/StockSight/TweetListener.py | 24 ++--- src/StockSight/YahooFinanceListener.py | 19 ++-- src/delindex.py | 2 +- src/import.kibana.py | 11 +- src/news.sentiment.py | 10 +- src/stockprice.py | 5 +- src/tweet.sentiment.py | 13 +-- tests/StockSight/SeekAlphaListenerTest.py | 58 +++++++++++ tests/StockSight/YahooFinanceListenerTest.py | 17 ++-- 19 files changed, 221 insertions(+), 117 deletions(-) create mode 100644 tests/StockSight/SeekAlphaListenerTest.py diff --git a/src/StockSight/EsMap/StockPrice.py b/src/StockSight/EsMap/StockPrice.py index 8a4fe57..3f9a017 100644 --- a/src/StockSight/EsMap/StockPrice.py +++ b/src/StockSight/EsMap/StockPrice.py @@ -30,4 +30,4 @@ "number_of_replicas": "0" } } -} \ No newline at end of file +} diff --git a/src/StockSight/Helper/Sentiment.py b/src/StockSight/Helper/Sentiment.py index aa98e23..1b4ca6c 100644 --- a/src/StockSight/Helper/Sentiment.py +++ b/src/StockSight/Helper/Sentiment.py @@ -21,11 +21,12 @@ from StockSight.Initializer.Logger import * -def get_sentiment_from_url(text, sentimentURL): + +def get_sentiment_from_url(text, sentiment_url): payload = {'text': text} try: - post = requests.post(sentimentURL, data=payload) + post = requests.post(sentiment_url, data=payload) logger.debug(post.status_code) logger.debug(post.text) except requests.exceptions.RequestException as re: @@ -62,10 +63,11 @@ def sentiment_analysis(text): uses sentiment polarity from TextBlob, VADER Sentiment and sentiment from text-processing URL could be made better :) + :param text: """ - sentimentURL = 'http://text-processing.com/api/sentiment/' + sentiment_url = 'http://text-processing.com/api/sentiment/' # pass text into sentiment url - sentiment_url = get_sentiment_from_url(text, sentimentURL) + sentiment_url = get_sentiment_from_url(text, sentiment_url) # pass text into TextBlob text_tb = TextBlob(text) @@ -79,7 +81,7 @@ def sentiment_analysis(text): sentiment = "negative" # very negative elif text_tb.sentiment.polarity <= 0 and text_vs['compound'] <= -0.1: sentiment = "negative" # somewhat negative - elif text_tb.sentiment.polarity == 0 and text_vs['compound'] > -0.1 and text_vs['compound'] < 0.1: + elif text_tb.sentiment.polarity == 0 and -0.1 < text_vs['compound'] < 0.1: sentiment = "neutral" elif text_tb.sentiment.polarity >= 0 and text_vs['compound'] >= 0.1: sentiment = "positive" # somewhat positive @@ -111,4 +113,4 @@ def sentiment_analysis(text): print("Sentiment (url): " + str(sentiment_url)) print("Sentiment (algorithm): " + str(sentiment)) - return polarity, text_tb.sentiment.subjectivity, sentiment \ No newline at end of file + return polarity, text_tb.sentiment.subjectivity, sentiment diff --git a/src/StockSight/Initializer/ConfigReader.py b/src/StockSight/Initializer/ConfigReader.py index 46c8c0d..4b382fb 100644 --- a/src/StockSight/Initializer/ConfigReader.py +++ b/src/StockSight/Initializer/ConfigReader.py @@ -3,9 +3,9 @@ config_file = PROJECT_SRC_PATH+'/config.yml' -def load_config(config_file): - data = None - with open(config_file) as json_data_file: + +def load_config(yml_file): + with open(yml_file) as json_data_file: data = yaml.load(json_data_file, yaml.FullLoader) return data diff --git a/src/StockSight/Initializer/ElasticSearch.py b/src/StockSight/Initializer/ElasticSearch.py index 3360dcb..45b4f28 100644 --- a/src/StockSight/Initializer/ElasticSearch.py +++ b/src/StockSight/Initializer/ElasticSearch.py @@ -20,4 +20,4 @@ # create instance of elasticsearch es = Elasticsearch(hosts=[{'host': config['elasticsearch']['host'], 'port': config['elasticsearch']['port']}], - http_auth=(config['elasticsearch']['user'], config['elasticsearch']['password'])) \ No newline at end of file + http_auth=(config['elasticsearch']['user'], config['elasticsearch']['password'])) diff --git a/src/StockSight/Initializer/Logger.py b/src/StockSight/Initializer/Logger.py index 0f045d9..a8c2554 100644 --- a/src/StockSight/Initializer/Logger.py +++ b/src/StockSight/Initializer/Logger.py @@ -35,4 +35,4 @@ % logging.getLevelName(logging.DEBUG)) logformatter = '%(asctime)s [%(levelname)s][%(name)s] %(message)s' loglevel = logging.INFO -logging.basicConfig(format=logformatter, level=loglevel) \ No newline at end of file +logging.basicConfig(format=logformatter, level=loglevel) diff --git a/src/StockSight/Initializer/Redis.py b/src/StockSight/Initializer/Redis.py index 993bf00..1211e9a 100644 --- a/src/StockSight/Initializer/Redis.py +++ b/src/StockSight/Initializer/Redis.py @@ -12,4 +12,4 @@ import redis from StockSight.Initializer.ConfigReader import config -rds = redis.Redis(host=str(config['redis']['host']), port=config['redis']['port'], db=config['redis']['db']) \ No newline at end of file +rds = redis.Redis(host=str(config['redis']['host']), port=config['redis']['port'], db=config['redis']['db']) diff --git a/src/StockSight/Model/Article.py b/src/StockSight/Model/Article.py index 317ea09..a58afec 100644 --- a/src/StockSight/Model/Article.py +++ b/src/StockSight/Model/Article.py @@ -1,6 +1,6 @@ class Article: - def __init__(self, title, url, body = '', referer_url = ''): + def __init__(self, title, url, body='', referer_url=''): self.title = title self.body = body self.url = url diff --git a/src/StockSight/NewsHeadlineListener.py b/src/StockSight/NewsHeadlineListener.py index cde7344..d1dbdab 100644 --- a/src/StockSight/NewsHeadlineListener.py +++ b/src/StockSight/NewsHeadlineListener.py @@ -30,12 +30,11 @@ from StockSight.Model.Article import * - class NewsHeadlineListener(ABC): - def __init__(self, type, symbol, url=None): + def __init__(self, news_type, symbol, url=None): self.symbol = symbol self.url = url - self.type = type + self.type = news_type self.cache_length = 2628000 self.index_name = config['elasticsearch']['table_prefix']['sentiment']+self.symbol.lower() @@ -67,15 +66,13 @@ def execute(self): for t in config['sentiment_analyzer']['ignore_words']: if t in tokens: logger.info("Text contains token from ignore list, not adding") - rds.set(md5_hash,1,self.cache_length) + rds.set(md5_hash, 1, self.cache_length) continue - nltk_tokens = [] if self.symbol in config['tickers']: nltk_tokens = config['tickers'][self.symbol] - # check required tokens from config tokenspass = False for t in nltk_tokens: @@ -85,7 +82,7 @@ def execute(self): if not tokenspass: logger.info("Text does not contain token from required list, not adding") - rds.set(md5_hash,1,self.cache_length) + rds.set(md5_hash, 1, self.cache_length) continue # get sentiment values @@ -98,7 +95,7 @@ def execute(self): body={ "msg_id": md5_hash, "date": datenow, - "referer_url": article_obj.refer_url, + "referer_url": article_obj.referer_url, "url": article_obj.url, "title": article_obj.title, "message": article_obj.body, @@ -107,7 +104,7 @@ def execute(self): "sentiment": sentiment }) - rds.set(md5_hash,1,self.cache_length) + rds.set(md5_hash, 1, self.cache_length) logger.info("Scraping news for %s from %s... Done" % (self.symbol, self.type)) @@ -116,7 +113,13 @@ def get_news_headlines(self): pass @abstractmethod - def get_page_text(url): + def get_page_text(self, url): pass + def get_article_with_atag(self, raw_article, parsed_uri): + a_tag = raw_article.find('a') + url_link = a_tag.get('href') + if url_link.find('http') != -1: + return None + return Article(a_tag.text, parsed_uri+url_link) diff --git a/src/StockSight/SeekAlphaListener.py b/src/StockSight/SeekAlphaListener.py index 9a07271..d561388 100644 --- a/src/StockSight/SeekAlphaListener.py +++ b/src/StockSight/SeekAlphaListener.py @@ -2,50 +2,94 @@ class SeekAlphaListener(NewsHeadlineListener): - def __init__(self,symbol): - super.__init__(symbol,"https://finance.yahoo.com/quote/%s/?p=%s" % (symbol, symbol)) + def __init__(self, symbol): + super(SeekAlphaListener, self)\ + .__init__("Seek Alpha", symbol, "https://seekingalpha.com/symbol/%s" % symbol) def get_news_headlines(self): - latestheadlines = [] - latestheadlines_links = [] + articles = [] parsed_uri = urlparse.urljoin(self.url, '/') try: - req = requests.get(self.url) html = req.text soup = BeautifulSoup(html, 'html.parser') - html = soup.findAll('h3') - links = soup.findAll('a') + analysis = soup.select('div.analysis div.symbol_article') + news = soup.select('div.news div.symbol_article') + + if analysis: + for rawArticle in analysis: + + article = self.get_article_with_atag(rawArticle, parsed_uri) + if article is None: + continue + + if config['news']['follow_link']: + analysis_url = parsed_uri + article.url + for p in self.get_analysis_summary(analysis_url): + article.body += str(p)+" " + + article.referer_url = self.url + articles.append(article) + + if news: + for rawArticle in news: + + article = self.get_article_with_atag(rawArticle, parsed_uri) + if article is None: + continue + + if config['news']['follow_link']: + news_url = parsed_uri + article.url + for p in self.get_news_summary(news_url): + article.body += str(p)+" " + + article.referer_url = self.url + articles.append(article) - if html: - for i in html: - latestheadlines.append((str(i.next.next.next.next), self.url)) - logger.debug(latestheadlines) + except requests.exceptions.RequestException as exce: + logger.warning("Exception: can't crawl web site (%s)" % exce) + pass - if config['news']['follow_link']: - if links: - for i in links: - if '/news/' in i['href']: - l = parsed_uri.rstrip('/') + i['href'] - latestheadlines_links.append(l) + return articles - logger.debug(latestheadlines_links) + def get_page_text(self, url): + pass - logger.info("Following any new links and grabbing text from page...") + def get_news_summary(self, url): + try: + req = requests.get(url) + html = req.text + soup = BeautifulSoup(html, 'html.parser') + html_p = soup.select('p.bullets_li') - for linkurl in latestheadlines_links: - for p in self.get_page_text(linkurl): - latestheadlines.append((str(p), linkurl)) - logger.debug(latestheadlines) + if html_p: + for i in html_p: + if i.string is not None: + yield i.string + else: + break - except requests.exceptions.RequestException as re: - logger.warning("Exception: can't crawl web site (%s)" % re) + except requests.exceptions.RequestException as exce: + logger.warning("Exception: can't crawl web site (%s)" % exce) pass - return latestheadlines + def get_analysis_summary(self, url): + try: + req = requests.get(url) + html = req.text + soup = BeautifulSoup(html, 'html.parser') + html_p = soup.select('div.a-sum p') - def get_page_text(self): - pass \ No newline at end of file + if html_p: + for i in html_p: + if i.string is not None: + yield i.string + else: + break + + except requests.exceptions.RequestException as exce: + logger.warning("Exception: can't crawl web site (%s)" % exce) + pass diff --git a/src/StockSight/StockPriceListener.py b/src/StockSight/StockPriceListener.py index 2601289..9b1e3f2 100644 --- a/src/StockSight/StockPriceListener.py +++ b/src/StockSight/StockPriceListener.py @@ -22,13 +22,14 @@ regex = re + class StockPriceListener: def __init__(self): self.index_name = None def get_price(self, symbol): - logger.info("Scraping price for %s from Yahoo Finance ..." % (symbol)) + logger.info("Scraping price for %s from Yahoo Finance ..." % symbol) if self.index_name is None: self.index_name = config['elasticsearch']['table_prefix']['price']+symbol.lower() @@ -37,10 +38,10 @@ def get_price(self, symbol): current_timezone = timezone(config['stock_price']['timezone_str']) - if config['stock_price']['time_check'] and self.isNotLive(current_timezone): + if config['stock_price']['time_check'] and self.is_not_live(current_timezone): today = datetime.datetime.now(current_timezone) logger.info("Stock market is not live. Current time: %s" % today.strftime("%Y-%m-%d %H:%M")) - return self; + return self logger.info("Grabbing stock data for symbol %s..." % symbol) @@ -106,17 +107,14 @@ def get_price(self, symbol): logger.error("Exception: can't get stock data, trying again later, reason is %s" % e) pass - logger.info("Scraping price for %s from Yahoo Finance... Done" % (symbol)) - - return self; + logger.info("Scraping price for %s from Yahoo Finance... Done" % symbol) + return self - def isNotLive(self, timezone): - today = datetime.datetime.now(timezone); - if today.weekday() >= config['stock_price']['weekday_start'] and \ - today.weekday() <= config['stock_price']['weekday_end'] and \ - today.hour >= config['stock_price']['hour_start'] and \ - today.hour <= config['stock_price']['hour_end']: - return False; + def is_not_live(self, current_timezone): + today = datetime.datetime.now(current_timezone) + if config['stock_price']['weekday_start'] <= today.weekday() <= config['stock_price']['weekday_end'] and \ + config['stock_price']['hour_start'] <= today.hour <= config['stock_price']['hour_end']: + return False - return True; + return True diff --git a/src/StockSight/TweetListener.py b/src/StockSight/TweetListener.py index a65f33a..e6f410f 100644 --- a/src/StockSight/TweetListener.py +++ b/src/StockSight/TweetListener.py @@ -40,7 +40,7 @@ def on_data(self, data): logger.debug(dict_data) # clean up tweet text - #text = unicodedata.normalize( + # text = unicodedata.normalize( # 'NFKD', dict_data["text"]).encode('ascii', 'ignore') text = dict_data["text"] if text is None: @@ -48,7 +48,7 @@ def on_data(self, data): return True # grab html links from tweet - #tweet_urls = re.search("http\S+", text) + # tweet_urls = re.search("http\S+", text) # clean up tweet text more text = text.replace("\n", " ") @@ -95,13 +95,13 @@ def on_data(self, data): # do some checks before adding to elasticsearch and crawling urls in tweet if friends == 0 or \ - followers < config['twitter']['min_followers'] or \ - statuses == 0 or \ - text == "": + followers < config['twitter']['min_followers'] or \ + statuses == 0 or \ + text == "": logger.info("Tweet doesn't meet min requirements, not adding") return True - redis_id = 'tweet'+str(tweetid); + redis_id = 'tweet'+str(tweetid) if rds.exists(redis_id): logger.info("Tweet already exists") return True @@ -134,7 +134,7 @@ def on_data(self, data): polarity, subjectivity, sentiment = sentiment_analysis(tweet) # remove hashtags for elasticsearch - #text_filtered = re.sub(r"[#|@|\$]\S+", "", text_filtered) + # text_filtered = re.sub(r"[#|@|\$]\S+", "", text_filtered) self.index_name = config['elasticsearch']['table_prefix']['sentiment']+self.symbol.lower() logger.info("Adding tweet to elasticsearch") @@ -154,7 +154,7 @@ def on_data(self, data): }) # add tweet_id to cache - rds.set(redis_id,1,86400) + rds.set(redis_id, 1, 86400) return True @@ -172,8 +172,6 @@ def on_timeout(self): logger.warning("Timeout...") return True - - def get_twitter_users_from_url(url): twitter_users = [] logger.info("Grabbing any twitter users from url %s" % url) @@ -196,8 +194,8 @@ def get_twitter_users_from_url(url): if parsed_uri in twitter_urls and "=" not in link and "?" not in link: user = link.split('/')[3] twitter_users.append(u'@' + user) - except requests.exceptions.RequestException as re: - logger.warning("Requests exception: can't crawl web site caused by: %s" % re) + except requests.exceptions.RequestException as exce: + logger.warning("Requests exception: can't crawl web site caused by: %s" % exce) pass return twitter_users @@ -217,4 +215,4 @@ def get_twitter_users_from_file(file): logger.warning("Exception: error opening file caused by: %s" % e) pass - return twitter_users \ No newline at end of file + return twitter_users diff --git a/src/StockSight/YahooFinanceListener.py b/src/StockSight/YahooFinanceListener.py index 458ab3b..7dc3fa8 100644 --- a/src/StockSight/YahooFinanceListener.py +++ b/src/StockSight/YahooFinanceListener.py @@ -2,7 +2,7 @@ class YahooFinanceListener(NewsHeadlineListener): - def __init__(self,symbol): + def __init__(self, symbol): super(YahooFinanceListener, self)\ .__init__("Yahoo Finance", symbol, "https://finance.yahoo.com/quote/%s/?p=%s" % (symbol, symbol)) @@ -22,19 +22,20 @@ def get_news_headlines(self): if html: for rawArticle in html: - aTag = rawArticle.find('a') - article = Article(aTag.text, aTag.get('href')) + article = self.get_article_with_atag(rawArticle, parsed_uri) + if article is None: + continue if config['news']['follow_link']: - new_url = parsed_uri + article.url - for p in self.get_page_text(new_url): - article.body += str(p) + news_url = parsed_uri + article.url + for p in self.get_page_text(news_url): + article.body += str(p)+" " - article.refer_url = self.url + article.referer_url = self.url articles.append(article) - except requests.exceptions.RequestException as re: - logger.warning("Exception: can't crawl web site (%s)" % re) + except requests.exceptions.RequestException as exce: + logger.warning("Exception: can't crawl web site (%s)" % exce) pass return articles diff --git a/src/delindex.py b/src/delindex.py index 12ae7d2..d5ad4e7 100644 --- a/src/delindex.py +++ b/src/delindex.py @@ -19,7 +19,7 @@ parser = argparse.ArgumentParser() parser.add_argument("-d", "--delindex", action="store_true", - help="Delete existing Elasticsearch index") + help="Delete existing Elasticsearch index") args = parser.parse_args() eslogger = logging.getLogger('elasticsearch') diff --git a/src/import.kibana.py b/src/import.kibana.py index db5b0c0..0477181 100644 --- a/src/import.kibana.py +++ b/src/import.kibana.py @@ -25,20 +25,21 @@ print("Imported %s Kibana Dashboard" % symbol) ndjson_file_path = 'kibana_export/'+symbol+'_exports.ndjson' ndjson_file = open(ndjson_file_path, "wt", encoding='utf-8') - final_text = import_template.replace('tmpl',symbol) + final_text = import_template.replace('tmpl', symbol) ndjson_file.write(final_text) ndjson_file.close() kibana_import_url = 'http://kibana:5601/api/saved_objects/_import' - payload = { 'overwrite': 'false'} - headers ={'kbn-xsrf': 'True'} - post = requests.request('POST',kibana_import_url, headers=headers, files={'file': open(ndjson_file_path, "rt", encoding='utf-8')}) + payload = {'overwrite': 'false'} + headers = {'kbn-xsrf': 'True'} + post = requests.request('POST', kibana_import_url, headers=headers, files={'file': open(ndjson_file_path, "rt", encoding='utf-8')}) print("Imported %s Kibana Dashboard" % symbol) print(ndjson_file_path) print(post.text) except Exception as e: - print(e); + print(e) + print("Please run this script manually once kibana is ready.") pass except KeyboardInterrupt: diff --git a/src/news.sentiment.py b/src/news.sentiment.py index 8f08a5c..d006769 100644 --- a/src/news.sentiment.py +++ b/src/news.sentiment.py @@ -30,7 +30,7 @@ # parse cli args parser = argparse.ArgumentParser() parser.add_argument("-v", "--verbose", action="store_true", - help="Increase output verbosity") + help="Increase output verbosity") parser.add_argument("--debug", action="store_true", help="Debug message output") parser.add_argument("-q", "--quiet", action="store_true", @@ -66,11 +66,15 @@ yahooThread = threading.Thread(target=yahooListener.execute) yahooThread.start() - time.sleep(randint(5,15)) + seekAlphaListener = SeekAlphaListener(symbol) + seekAlphaThread = threading.Thread(target=seekAlphaListener.execute) + seekAlphaThread.start() + + time.sleep(randint(5, 10)) except Exception as e: logger.warning("%s" % e) pass except KeyboardInterrupt: print("Ctrl-c keyboard interrupt, exiting...") - sys.exit(0) \ No newline at end of file + sys.exit(0) diff --git a/src/stockprice.py b/src/stockprice.py index 0620f4a..10d5c4d 100644 --- a/src/stockprice.py +++ b/src/stockprice.py @@ -27,8 +27,6 @@ __version__ = STOCKSIGHT_VERSION - - if __name__ == '__main__': # parse cli args @@ -84,7 +82,8 @@ for symbol in config['tickers']: try: logger.info('Creating new Price index or using existing ' + symbol) - es.indices.create(index=config['elasticsearch']['table_prefix']['sentiment']+symbol.lower(), body=mapping, ignore=[400, 404]) + es.indices.create(index=config['elasticsearch']['table_prefix']['sentiment']+symbol.lower(), + body=mapping, ignore=[400, 404]) stockprice = StockPriceListener() diff --git a/src/tweet.sentiment.py b/src/tweet.sentiment.py index 4689b76..207a5f8 100644 --- a/src/tweet.sentiment.py +++ b/src/tweet.sentiment.py @@ -29,7 +29,7 @@ # parse cli args parser = argparse.ArgumentParser() parser.add_argument("-v", "--verbose", action="store_true", - help="Increase output verbosity") + help="Increase output verbosity") parser.add_argument("--debug", action="store_true", help="Debug message output") parser.add_argument("-q", "--quiet", action="store_true", @@ -52,14 +52,12 @@ eslogger.disabled = True requestslogger.disabled = True - consumer_key = config['twitter']['consumer_key'] consumer_secret = config['twitter']['consumer_secret'] access_token = config['twitter']['access_token'] access_token_secret = config['twitter']['access_token_secret'] twitter_feeds = config['twitter']['feeds'] - #TODO exit if the twitter keys are empty if not consumer_key or \ not consumer_secret or \ not access_token or \ @@ -67,7 +65,6 @@ logger.warning("Invalid Twitter API cred") sys.exit(1) - try: for symbol in config['twitter']: logger.info('Creating new Elasticsearch index or using existing ' + symbol) @@ -104,7 +101,7 @@ # sleep a bit in case twitter suspends us logger.warning("Tweepy exception: twitter api error caused by: %s" % te) logger.info("Sleeping for a random amount of time and retrying...") - time.sleep(randint(1,10)) + time.sleep(randint(1, 10)) continue except KeyboardInterrupt: logger.info("Ctrl-c keyboard interrupt, exiting...") @@ -128,7 +125,6 @@ except Exception as e: raise - # search twitter for keywords logger.info('NLTK tokens required: ' + str(config['tickers'])) logger.info('NLTK tokens ignored: ' + str(config['sentiment_analyzer']['ignore_words'])) @@ -143,11 +139,6 @@ print("Ctrl-c keyboard interrupt, exiting...") stream.disconnect() sys.exit(0) - except Exception as e: logger.warning("%s" % e) pass - - except KeyboardInterrupt: - print("Ctrl-c keyboard interrupt, exiting...") - sys.exit(0) \ No newline at end of file diff --git a/tests/StockSight/SeekAlphaListenerTest.py b/tests/StockSight/SeekAlphaListenerTest.py new file mode 100644 index 0000000..7c824e8 --- /dev/null +++ b/tests/StockSight/SeekAlphaListenerTest.py @@ -0,0 +1,58 @@ +import unittest +from StockSight.SeekAlphaListener import * +from StockSight.EsMap.Sentiment import mapping + + +class SeekAlphaListenerTest(unittest.TestCase): + + symbol = 'tsla' + + def setUp(self): + config['redis']['db'] = 1 + self.mainClass = SeekAlphaListener(self.symbol) + + @classmethod + def setUpClass(cls): + cls.index_name = "stocksight_sentiment_test_"+cls.symbol + es.indices.create(index=cls.index_name, body=mapping, ignore=[400, 404]) + + @classmethod + def tearDownClass(cls): + rds.flushdb() + es.indices.delete(index=cls.index_name, ignore=[400, 404]) + + def test_get_news_headlines(self): + headlines = self.mainClass.get_news_headlines() + self.assertGreaterEqual(headlines.__len__(), 1, "Empty Headline") + self.assertIsInstance(headlines[0], Article, "Is not an Article") + self.assertIsNotNone(headlines[0].title, "Title is empty") + self.assertIsNotNone(headlines[0].url, "URL is empty") + self.assertIsNotNone(headlines[0].referer_url, "Refer URL is empty") + + def test_get_news_headlines_with_body(self): + config['news']['follow_link'] = True + headlines = self.mainClass.get_news_headlines() + self.assertGreaterEqual(headlines.__len__(), 1, "Empty Headline") + self.assertIsInstance(headlines[0], Article, "Is not an Article") + self.assertIsNotNone(headlines[0].title, "Title is empty") + self.assertIsNotNone(headlines[0].url, "URL is empty") + self.assertNotEqual(headlines[0].referer_url, '', "Refer URL is empty") + for message in headlines: + if message.body == '': continue; + else: + self.assertNotEqual(headlines[0].body, '', "Body is empty") + break + + def test_execute(self): + self.mainClass.index_name = self.index_name + self.mainClass.execute() + logs = es.search(index=self.index_name,body="{}") + message = logs['hits']['hits'][0]['_source'] + self.assertIsNotNone(message['title'], "Title is empty") + self.assertIsNotNone(message['url'], "URL is empty") + self.assertNotEqual(message['referer_url'], '', "Refer URL is empty") + self.assertIsNotNone(message['sentiment'], "Sentiment is empty") + self.assertIsNotNone(message['polarity'], "Polarity is empty") + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/tests/StockSight/YahooFinanceListenerTest.py b/tests/StockSight/YahooFinanceListenerTest.py index 77413b2..65ee674 100644 --- a/tests/StockSight/YahooFinanceListenerTest.py +++ b/tests/StockSight/YahooFinanceListenerTest.py @@ -2,23 +2,24 @@ from StockSight.YahooFinanceListener import * from StockSight.EsMap.Sentiment import mapping + class YahooFinanceListenerTest(unittest.TestCase): symbol = 'tsla' def setUp(self): config['redis']['db'] = 1 - self.mainClass = YahooFinanceListener(self.symbol); + self.mainClass = YahooFinanceListener(self.symbol) @classmethod def setUpClass(cls): - cls.index_name = "stocksight_sentiment_test_"+cls.symbol; + cls.index_name = "stocksight_sentiment_test_"+cls.symbol es.indices.create(index=cls.index_name, body=mapping, ignore=[400, 404]) @classmethod def tearDownClass(cls): rds.flushdb() - es.indices.delete(index=cls.index_name,ignore=[400, 404]) + es.indices.delete(index=cls.index_name, ignore=[400, 404]) def test_get_news_headlines(self): headlines = self.mainClass.get_news_headlines() @@ -26,7 +27,7 @@ def test_get_news_headlines(self): self.assertIsInstance(headlines[0], Article, "Is not an Article") self.assertIsNotNone(headlines[0].title, "Title is empty") self.assertIsNotNone(headlines[0].url, "URL is empty") - self.assertIsNotNone(headlines[0].refer_url, "Refer URL is empty") + self.assertIsNotNone(headlines[0].referer_url, "Refer URL is empty") def test_get_news_headlines_with_body(self): config['news']['follow_link'] = True @@ -35,8 +36,12 @@ def test_get_news_headlines_with_body(self): self.assertIsInstance(headlines[0], Article, "Is not an Article") self.assertIsNotNone(headlines[0].title, "Title is empty") self.assertIsNotNone(headlines[0].url, "URL is empty") - self.assertNotEqual(headlines[0].url, '', "Refer URL is empty") - self.assertNotEqual(headlines[0].body, '', "Refer URL is empty") + self.assertNotEqual(headlines[0].referer_url, '', "Refer URL is empty") + for message in headlines: + if message.body == '': continue; + else: + self.assertNotEqual(headlines[0].body, '', "Body is empty") + break def test_execute(self): self.mainClass.index_name = self.index_name From 84c6324f177d80c70cd0d2a754d631cdd019da2f Mon Sep 17 00:00:00 2001 From: heyqule Date: Mon, 9 Sep 2019 00:11:34 -0400 Subject: [PATCH 31/55] Minor Import script adjustment --- src/import.kibana.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/import.kibana.py b/src/import.kibana.py index 0477181..92afc62 100644 --- a/src/import.kibana.py +++ b/src/import.kibana.py @@ -22,7 +22,7 @@ for symbol in config['tickers']: try: - print("Imported %s Kibana Dashboard" % symbol) + print("Starting %s Kibana Dashboard Import" % symbol) ndjson_file_path = 'kibana_export/'+symbol+'_exports.ndjson' ndjson_file = open(ndjson_file_path, "wt", encoding='utf-8') final_text = import_template.replace('tmpl', symbol) From 4cd6af4cea4dcb621cd8914dae4fc8bdb4d2c3b6 Mon Sep 17 00:00:00 2001 From: heyqule Date: Mon, 9 Sep 2019 01:59:24 -0400 Subject: [PATCH 32/55] Index structure change --- src/StockSight/EsMap/Sentiment.py | 4 ++-- src/StockSight/TweetListener.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/StockSight/EsMap/Sentiment.py b/src/StockSight/EsMap/Sentiment.py index 6dc701b..afe5e17 100644 --- a/src/StockSight/EsMap/Sentiment.py +++ b/src/StockSight/EsMap/Sentiment.py @@ -9,7 +9,7 @@ "type": "keyword", }, "url": { - "type": "keyword", + "type": "text", }, "location": { "type": "keyword", @@ -24,7 +24,7 @@ "type": "text", }, "msg_id": { - "type": "text" + "type": "keyword" }, "polarity": { "type": "float" diff --git a/src/StockSight/TweetListener.py b/src/StockSight/TweetListener.py index e6f410f..eb55811 100644 --- a/src/StockSight/TweetListener.py +++ b/src/StockSight/TweetListener.py @@ -146,8 +146,8 @@ def on_data(self, data): "author": screen_name, "location": location, "date": created_date, - "title": '', - "message": text_filtered, + "title": text_filtered, + "message": '', "polarity": polarity, "subjectivity": subjectivity, "sentiment": sentiment From 622eae131430ac146b8cd1cba5c54013ddc327b2 Mon Sep 17 00:00:00 2001 From: heyqule Date: Mon, 9 Sep 2019 02:40:06 -0400 Subject: [PATCH 33/55] Fix message body --- src/StockSight/NewsHeadlineListener.py | 12 ++++++++++-- src/StockSight/SeekAlphaListener.py | 10 +++++----- src/StockSight/YahooFinanceListener.py | 4 ++-- tests/StockSight/SeekAlphaListenerTest.py | 9 +++++++-- tests/StockSight/YahooFinanceListenerTest.py | 8 ++++++-- 5 files changed, 30 insertions(+), 13 deletions(-) diff --git a/src/StockSight/NewsHeadlineListener.py b/src/StockSight/NewsHeadlineListener.py index d1dbdab..67ed3c2 100644 --- a/src/StockSight/NewsHeadlineListener.py +++ b/src/StockSight/NewsHeadlineListener.py @@ -119,7 +119,15 @@ def get_page_text(self, url): def get_article_with_atag(self, raw_article, parsed_uri): a_tag = raw_article.find('a') url_link = a_tag.get('href') - if url_link.find('http') != -1: + #ignore 3rd party links + if url_link.find('http') != -1 and url_link.find(parsed_uri) == -1 : return None - return Article(a_tag.text, parsed_uri+url_link) + return Article(a_tag.text, url_link) + + def get_proper_new_body_url(self, article_url, host): + if article_url.find('http') != -1: + news_url = article_url + else: + news_url = host[0:-1] + article_url + return news_url diff --git a/src/StockSight/SeekAlphaListener.py b/src/StockSight/SeekAlphaListener.py index d561388..acf90fd 100644 --- a/src/StockSight/SeekAlphaListener.py +++ b/src/StockSight/SeekAlphaListener.py @@ -27,8 +27,8 @@ def get_news_headlines(self): continue if config['news']['follow_link']: - analysis_url = parsed_uri + article.url - for p in self.get_analysis_summary(analysis_url): + body_url = self.get_proper_new_body_url(article.url, parsed_uri) + for p in self.get_analysis_summary(body_url): article.body += str(p)+" " article.referer_url = self.url @@ -42,8 +42,8 @@ def get_news_headlines(self): continue if config['news']['follow_link']: - news_url = parsed_uri + article.url - for p in self.get_news_summary(news_url): + body_url = self.get_proper_new_body_url(article.url, parsed_uri) + for p in self.get_news_summary(body_url): article.body += str(p)+" " article.referer_url = self.url @@ -78,7 +78,7 @@ def get_news_summary(self, url): def get_analysis_summary(self, url): try: - req = requests.get(url) + req = requests.get(str(url)) html = req.text soup = BeautifulSoup(html, 'html.parser') html_p = soup.select('div.a-sum p') diff --git a/src/StockSight/YahooFinanceListener.py b/src/StockSight/YahooFinanceListener.py index 7dc3fa8..7ab6aba 100644 --- a/src/StockSight/YahooFinanceListener.py +++ b/src/StockSight/YahooFinanceListener.py @@ -27,8 +27,8 @@ def get_news_headlines(self): continue if config['news']['follow_link']: - news_url = parsed_uri + article.url - for p in self.get_page_text(news_url): + body_url = self.get_proper_new_body_url(article.url, parsed_uri) + for p in self.get_page_text(body_url): article.body += str(p)+" " article.referer_url = self.url diff --git a/tests/StockSight/SeekAlphaListenerTest.py b/tests/StockSight/SeekAlphaListenerTest.py index 7c824e8..683a454 100644 --- a/tests/StockSight/SeekAlphaListenerTest.py +++ b/tests/StockSight/SeekAlphaListenerTest.py @@ -37,12 +37,17 @@ def test_get_news_headlines_with_body(self): self.assertIsNotNone(headlines[0].title, "Title is empty") self.assertIsNotNone(headlines[0].url, "URL is empty") self.assertNotEqual(headlines[0].referer_url, '', "Refer URL is empty") + hasBody = False for message in headlines: - if message.body == '': continue; + if message.body == '': + continue else: - self.assertNotEqual(headlines[0].body, '', "Body is empty") + hasBody = True break + self.assertEqual(hasBody, True, "Body is empty") + + def test_execute(self): self.mainClass.index_name = self.index_name self.mainClass.execute() diff --git a/tests/StockSight/YahooFinanceListenerTest.py b/tests/StockSight/YahooFinanceListenerTest.py index 65ee674..9c4f4ce 100644 --- a/tests/StockSight/YahooFinanceListenerTest.py +++ b/tests/StockSight/YahooFinanceListenerTest.py @@ -37,12 +37,16 @@ def test_get_news_headlines_with_body(self): self.assertIsNotNone(headlines[0].title, "Title is empty") self.assertIsNotNone(headlines[0].url, "URL is empty") self.assertNotEqual(headlines[0].referer_url, '', "Refer URL is empty") + hasBody = False for message in headlines: - if message.body == '': continue; + if message.body == '': + continue else: - self.assertNotEqual(headlines[0].body, '', "Body is empty") + hasBody = True break + self.assertEqual(hasBody, True, "Body is empty") + def test_execute(self): self.mainClass.index_name = self.index_name self.mainClass.execute() From baa9d5ff3fd70c1958affa045a5a4726379eb605 Mon Sep 17 00:00:00 2001 From: heyqule Date: Mon, 9 Sep 2019 22:27:34 -0400 Subject: [PATCH 34/55] Optimiaztion --- src/StockSight/Model/Article.py | 5 ++- src/StockSight/NewsHeadlineListener.py | 14 ++++---- src/StockSight/SeekAlphaListener.py | 36 ++++++++------------ src/StockSight/YahooFinanceListener.py | 18 +++++----- src/config.sample.yml | 6 ++-- tests/StockSight/SeekAlphaListenerTest.py | 7 ++-- tests/StockSight/StockPriceListenerTest.py | 29 ++++++++++++++++ tests/StockSight/YahooFinanceListenerTest.py | 12 +++---- 8 files changed, 76 insertions(+), 51 deletions(-) create mode 100644 tests/StockSight/StockPriceListenerTest.py diff --git a/src/StockSight/Model/Article.py b/src/StockSight/Model/Article.py index a58afec..fd59b47 100644 --- a/src/StockSight/Model/Article.py +++ b/src/StockSight/Model/Article.py @@ -1,3 +1,5 @@ +import hashlib + class Article: def __init__(self, title, url, body='', referer_url=''): @@ -5,6 +7,7 @@ def __init__(self, title, url, body='', referer_url=''): self.body = body self.url = url self.referer_url = referer_url + self.msg_id = hashlib.md5((self.title + self.url).encode()).hexdigest() def __eq__(self, other): - return self.url == other.url and self.title == other.title + return self.msg_id and other.msg_id diff --git a/src/StockSight/NewsHeadlineListener.py b/src/StockSight/NewsHeadlineListener.py index 67ed3c2..e695c2a 100644 --- a/src/StockSight/NewsHeadlineListener.py +++ b/src/StockSight/NewsHeadlineListener.py @@ -45,9 +45,7 @@ def execute(self): # add any new headlines for article_obj in articles: - md5_hash = hashlib.md5((article_obj.title + article_obj.url).encode()).hexdigest() - - if rds.exists(md5_hash) is 0: + if rds.exists(article_obj.msg_id) is 0: datenow = datetime.utcnow().isoformat() # output news data @@ -66,7 +64,7 @@ def execute(self): for t in config['sentiment_analyzer']['ignore_words']: if t in tokens: logger.info("Text contains token from ignore list, not adding") - rds.set(md5_hash, 1, self.cache_length) + rds.set(article_obj.msg_id, 1, self.cache_length) continue nltk_tokens = [] @@ -82,7 +80,7 @@ def execute(self): if not tokenspass: logger.info("Text does not contain token from required list, not adding") - rds.set(md5_hash, 1, self.cache_length) + rds.set(article_obj.msg_id, 1, self.cache_length) continue # get sentiment values @@ -93,7 +91,7 @@ def execute(self): es.index(index=self.index_name, doc_type="_doc", body={ - "msg_id": md5_hash, + "msg_id": article_obj.msg_id, "date": datenow, "referer_url": article_obj.referer_url, "url": article_obj.url, @@ -104,7 +102,7 @@ def execute(self): "sentiment": sentiment }) - rds.set(md5_hash, 1, self.cache_length) + rds.set(article_obj.msg_id, 1, self.cache_length) logger.info("Scraping news for %s from %s... Done" % (self.symbol, self.type)) @@ -131,3 +129,5 @@ def get_proper_new_body_url(self, article_url, host): news_url = host[0:-1] + article_url return news_url + def can_process(self, article): + return article is not None and rds.exists(article.msg_id) is 0 diff --git a/src/StockSight/SeekAlphaListener.py b/src/StockSight/SeekAlphaListener.py index acf90fd..7374f30 100644 --- a/src/StockSight/SeekAlphaListener.py +++ b/src/StockSight/SeekAlphaListener.py @@ -21,33 +21,27 @@ def get_news_headlines(self): if analysis: for rawArticle in analysis: - article = self.get_article_with_atag(rawArticle, parsed_uri) - if article is None: - continue - - if config['news']['follow_link']: - body_url = self.get_proper_new_body_url(article.url, parsed_uri) - for p in self.get_analysis_summary(body_url): - article.body += str(p)+" " + if self.can_process(article): + if config['news']['follow_link']: + body_url = self.get_proper_new_body_url(article.url, parsed_uri) + for p in self.get_analysis_summary(body_url): + article.body += str(p)+" " - article.referer_url = self.url - articles.append(article) + article.referer_url = self.url + articles.append(article) if news: for rawArticle in news: - article = self.get_article_with_atag(rawArticle, parsed_uri) - if article is None: - continue - - if config['news']['follow_link']: - body_url = self.get_proper_new_body_url(article.url, parsed_uri) - for p in self.get_news_summary(body_url): - article.body += str(p)+" " - - article.referer_url = self.url - articles.append(article) + if self.can_process(article): + if config['news']['follow_link']: + body_url = self.get_proper_new_body_url(article.url, parsed_uri) + for p in self.get_news_summary(body_url): + article.body += str(p)+" " + + article.referer_url = self.url + articles.append(article) except requests.exceptions.RequestException as exce: logger.warning("Exception: can't crawl web site (%s)" % exce) diff --git a/src/StockSight/YahooFinanceListener.py b/src/StockSight/YahooFinanceListener.py index 7ab6aba..4c223bd 100644 --- a/src/StockSight/YahooFinanceListener.py +++ b/src/StockSight/YahooFinanceListener.py @@ -23,16 +23,14 @@ def get_news_headlines(self): for rawArticle in html: article = self.get_article_with_atag(rawArticle, parsed_uri) - if article is None: - continue - - if config['news']['follow_link']: - body_url = self.get_proper_new_body_url(article.url, parsed_uri) - for p in self.get_page_text(body_url): - article.body += str(p)+" " - - article.referer_url = self.url - articles.append(article) + if self.can_process(article): + if config['news']['follow_link']: + body_url = self.get_proper_new_body_url(article.url, parsed_uri) + for p in self.get_page_text(body_url): + article.body += str(p)+" " + + article.referer_url = self.url + articles.append(article) except requests.exceptions.RequestException as exce: logger.warning("Exception: can't crawl web site (%s)" % exce) diff --git a/src/config.sample.yml b/src/config.sample.yml index 7597ecd..a73b44a 100644 --- a/src/config.sample.yml +++ b/src/config.sample.yml @@ -40,10 +40,10 @@ twitter: stock_price: time_check : true #True, fetch the price only within the below time range - weekday_start : 1 - weekday_end : 5 + weekday_start : 0 + weekday_end : 4 hour_start : 9 - hour_end : 18 + hour_end : 17 timezone_str : America/Toronto tickers: diff --git a/tests/StockSight/SeekAlphaListenerTest.py b/tests/StockSight/SeekAlphaListenerTest.py index 683a454..33c42ce 100644 --- a/tests/StockSight/SeekAlphaListenerTest.py +++ b/tests/StockSight/SeekAlphaListenerTest.py @@ -11,6 +11,9 @@ def setUp(self): config['redis']['db'] = 1 self.mainClass = SeekAlphaListener(self.symbol) + def tearDown(self): + rds.flushdb() + @classmethod def setUpClass(cls): cls.index_name = "stocksight_sentiment_test_"+cls.symbol @@ -23,7 +26,7 @@ def tearDownClass(cls): def test_get_news_headlines(self): headlines = self.mainClass.get_news_headlines() - self.assertGreaterEqual(headlines.__len__(), 1, "Empty Headline") + self.assertGreaterEqual(headlines.__len__(), 1, "Empty Headline / Page returned 403") self.assertIsInstance(headlines[0], Article, "Is not an Article") self.assertIsNotNone(headlines[0].title, "Title is empty") self.assertIsNotNone(headlines[0].url, "URL is empty") @@ -32,7 +35,7 @@ def test_get_news_headlines(self): def test_get_news_headlines_with_body(self): config['news']['follow_link'] = True headlines = self.mainClass.get_news_headlines() - self.assertGreaterEqual(headlines.__len__(), 1, "Empty Headline") + self.assertGreaterEqual(headlines.__len__(), 1, "Empty Headline / Page returned 403") self.assertIsInstance(headlines[0], Article, "Is not an Article") self.assertIsNotNone(headlines[0].title, "Title is empty") self.assertIsNotNone(headlines[0].url, "URL is empty") diff --git a/tests/StockSight/StockPriceListenerTest.py b/tests/StockSight/StockPriceListenerTest.py new file mode 100644 index 0000000..5772bd5 --- /dev/null +++ b/tests/StockSight/StockPriceListenerTest.py @@ -0,0 +1,29 @@ +import unittest +from StockSight.StockPriceListener import * +from StockSight.EsMap.StockPrice import mapping + + +class StockPriceListenerTest(unittest.TestCase): + + symbol = 'tsla' + + def setUp(self): + config['redis']['db'] = 1 + self.mainClass = StockPriceListener() + + @classmethod + def setUpClass(cls): + cls.index_name = "stocksight_price_test_"+cls.symbol + es.indices.create(index=cls.index_name, body=mapping, ignore=[400, 404]) + + @classmethod + def tearDownClass(cls): + es.indices.delete(index=cls.index_name, ignore=[400, 404]) + + def test_get_price(self): + config['stock_price']['time_check'] = False + self.mainClass.index_name = self.index_name + self.mainClass.get_price(self.symbol) + logs = es.search(index=self.index_name,body="{}") + message = logs['hits']['hits'][0]['_source'] + self.assertIsNotNone(message['price_last'], "Price is empty") \ No newline at end of file diff --git a/tests/StockSight/YahooFinanceListenerTest.py b/tests/StockSight/YahooFinanceListenerTest.py index 9c4f4ce..e39917f 100644 --- a/tests/StockSight/YahooFinanceListenerTest.py +++ b/tests/StockSight/YahooFinanceListenerTest.py @@ -11,19 +11,17 @@ def setUp(self): config['redis']['db'] = 1 self.mainClass = YahooFinanceListener(self.symbol) + def tearDown(self): + rds.flushdb() + @classmethod def setUpClass(cls): cls.index_name = "stocksight_sentiment_test_"+cls.symbol es.indices.create(index=cls.index_name, body=mapping, ignore=[400, 404]) - @classmethod - def tearDownClass(cls): - rds.flushdb() - es.indices.delete(index=cls.index_name, ignore=[400, 404]) - def test_get_news_headlines(self): headlines = self.mainClass.get_news_headlines() - self.assertGreaterEqual(headlines.__len__(), 1, "Empty Headline") + self.assertGreaterEqual(headlines.__len__(), 1, "Empty Headline / Page returned 403") self.assertIsInstance(headlines[0], Article, "Is not an Article") self.assertIsNotNone(headlines[0].title, "Title is empty") self.assertIsNotNone(headlines[0].url, "URL is empty") @@ -32,7 +30,7 @@ def test_get_news_headlines(self): def test_get_news_headlines_with_body(self): config['news']['follow_link'] = True headlines = self.mainClass.get_news_headlines() - self.assertGreaterEqual(headlines.__len__(), 1, "Empty Headline") + self.assertGreaterEqual(headlines.__len__(), 1, "Empty Headline / Page returned 403") self.assertIsInstance(headlines[0], Article, "Is not an Article") self.assertIsNotNone(headlines[0].title, "Title is empty") self.assertIsNotNone(headlines[0].url, "URL is empty") From 040887bc15ec3649370cc5a007a9970960119e51 Mon Sep 17 00:00:00 2001 From: heyqule Date: Mon, 9 Sep 2019 22:31:27 -0400 Subject: [PATCH 35/55] Add delay before fetching from elasticsearch . --- tests/StockSight/SeekAlphaListenerTest.py | 2 ++ tests/StockSight/StockPriceListenerTest.py | 2 ++ tests/StockSight/YahooFinanceListenerTest.py | 2 ++ 3 files changed, 6 insertions(+) diff --git a/tests/StockSight/SeekAlphaListenerTest.py b/tests/StockSight/SeekAlphaListenerTest.py index 33c42ce..9dc1798 100644 --- a/tests/StockSight/SeekAlphaListenerTest.py +++ b/tests/StockSight/SeekAlphaListenerTest.py @@ -1,6 +1,7 @@ import unittest from StockSight.SeekAlphaListener import * from StockSight.EsMap.Sentiment import mapping +import time class SeekAlphaListenerTest(unittest.TestCase): @@ -54,6 +55,7 @@ def test_get_news_headlines_with_body(self): def test_execute(self): self.mainClass.index_name = self.index_name self.mainClass.execute() + time.sleep(1) logs = es.search(index=self.index_name,body="{}") message = logs['hits']['hits'][0]['_source'] self.assertIsNotNone(message['title'], "Title is empty") diff --git a/tests/StockSight/StockPriceListenerTest.py b/tests/StockSight/StockPriceListenerTest.py index 5772bd5..83caf53 100644 --- a/tests/StockSight/StockPriceListenerTest.py +++ b/tests/StockSight/StockPriceListenerTest.py @@ -1,4 +1,5 @@ import unittest +import time from StockSight.StockPriceListener import * from StockSight.EsMap.StockPrice import mapping @@ -24,6 +25,7 @@ def test_get_price(self): config['stock_price']['time_check'] = False self.mainClass.index_name = self.index_name self.mainClass.get_price(self.symbol) + time.sleep(1) logs = es.search(index=self.index_name,body="{}") message = logs['hits']['hits'][0]['_source'] self.assertIsNotNone(message['price_last'], "Price is empty") \ No newline at end of file diff --git a/tests/StockSight/YahooFinanceListenerTest.py b/tests/StockSight/YahooFinanceListenerTest.py index e39917f..08e94dc 100644 --- a/tests/StockSight/YahooFinanceListenerTest.py +++ b/tests/StockSight/YahooFinanceListenerTest.py @@ -1,6 +1,7 @@ import unittest from StockSight.YahooFinanceListener import * from StockSight.EsMap.Sentiment import mapping +import time class YahooFinanceListenerTest(unittest.TestCase): @@ -48,6 +49,7 @@ def test_get_news_headlines_with_body(self): def test_execute(self): self.mainClass.index_name = self.index_name self.mainClass.execute() + time.sleep(1) logs = es.search(index=self.index_name,body="{}") message = logs['hits']['hits'][0]['_source'] self.assertIsNotNone(message['title'], "Title is empty") From 56901dc502e08526d806978c7c487baf110d20bc Mon Sep 17 00:00:00 2001 From: heyqule Date: Mon, 9 Sep 2019 23:39:59 -0400 Subject: [PATCH 36/55] Kibana change Additional news scraper change --- src/StockSight/NewsHeadlineListener.py | 2 +- src/StockSight/SeekAlphaListener.py | 4 ++-- src/StockSight/YahooFinanceListener.py | 2 +- src/kibana_export/export.7.3.ndjson | 13 +++++++------ 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/StockSight/NewsHeadlineListener.py b/src/StockSight/NewsHeadlineListener.py index e695c2a..53720ba 100644 --- a/src/StockSight/NewsHeadlineListener.py +++ b/src/StockSight/NewsHeadlineListener.py @@ -120,7 +120,7 @@ def get_article_with_atag(self, raw_article, parsed_uri): #ignore 3rd party links if url_link.find('http') != -1 and url_link.find(parsed_uri) == -1 : return None - return Article(a_tag.text, url_link) + return Article(a_tag.text, self.get_proper_new_body_url(url_link,parsed_uri)) def get_proper_new_body_url(self, article_url, host): if article_url.find('http') != -1: diff --git a/src/StockSight/SeekAlphaListener.py b/src/StockSight/SeekAlphaListener.py index 7374f30..d80c5ad 100644 --- a/src/StockSight/SeekAlphaListener.py +++ b/src/StockSight/SeekAlphaListener.py @@ -24,7 +24,7 @@ def get_news_headlines(self): article = self.get_article_with_atag(rawArticle, parsed_uri) if self.can_process(article): if config['news']['follow_link']: - body_url = self.get_proper_new_body_url(article.url, parsed_uri) + body_url = article.url for p in self.get_analysis_summary(body_url): article.body += str(p)+" " @@ -36,7 +36,7 @@ def get_news_headlines(self): article = self.get_article_with_atag(rawArticle, parsed_uri) if self.can_process(article): if config['news']['follow_link']: - body_url = self.get_proper_new_body_url(article.url, parsed_uri) + body_url = article.url for p in self.get_news_summary(body_url): article.body += str(p)+" " diff --git a/src/StockSight/YahooFinanceListener.py b/src/StockSight/YahooFinanceListener.py index 4c223bd..454e19d 100644 --- a/src/StockSight/YahooFinanceListener.py +++ b/src/StockSight/YahooFinanceListener.py @@ -25,7 +25,7 @@ def get_news_headlines(self): article = self.get_article_with_atag(rawArticle, parsed_uri) if self.can_process(article): if config['news']['follow_link']: - body_url = self.get_proper_new_body_url(article.url, parsed_uri) + body_url = article.url for p in self.get_page_text(body_url): article.body += str(p)+" " diff --git a/src/kibana_export/export.7.3.ndjson b/src/kibana_export/export.7.3.ndjson index 06b342c..37300ef 100644 --- a/src/kibana_export/export.7.3.ndjson +++ b/src/kibana_export/export.7.3.ndjson @@ -1,6 +1,7 @@ -{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"esTypes\":[\"_id\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"esTypes\":[\"_index\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"esTypes\":[\"_source\"],\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"esTypes\":[\"_type\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"date\",\"type\":\"date\",\"esTypes\":[\"date\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"location\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"location.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"location\",\"subType\":\"multi\"},{\"name\":\"message\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"message.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"message\",\"subType\":\"multi\"},{\"name\":\"polarity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"sentiment\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"sentiment.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"sentiment\",\"subType\":\"multi\"},{\"name\":\"subjectivity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"date","title":"stocksight_*_tmpl"},"id":"tmpl_index_pattern","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2019-08-31T04:11:15.828Z","version":"WzcsMV0="} -{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl_polarity","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl_polarity\",\"type\":\"metric\",\"params\":{\"metric\":{\"percentageMode\":false,\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"metricColorMode\":\"None\",\"colorsRange\":[{\"type\":\"range\",\"from\":0,\"to\":10000}],\"labels\":{\"show\":true},\"invertColors\":false,\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\",\"fontSize\":32}},\"dimensions\":{\"metrics\":[{\"type\":\"vis_dimension\",\"accessor\":0,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":1,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":2,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":3,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":4,\"format\":{\"id\":\"number\",\"params\":{}}}]},\"addTooltip\":true,\"addLegend\":false,\"type\":\"metric\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"median\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\",\"percents\":[50],\"customLabel\":\"\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"5\",\"enabled\":true,\"type\":\"min\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}}]}"},"id":"tmpl_polarity_visual","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl_index_pattern","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-08-31T04:17:03.654Z","version":"WzgsMV0="} -{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl_sentinel","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl_sentinel\",\"type\":\"pie\",\"params\":{\"type\":\"pie\",\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"top\",\"isDonut\":false,\"labels\":{\"show\":true,\"values\":true,\"last_level\":true,\"truncate\":100},\"dimensions\":{\"metric\":{\"accessor\":1,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"},\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":5,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\"}}]}"},"id":"tmpl_polarity_sentinel","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl_index_pattern","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-08-31T04:19:36.071Z","version":"WzksMV0="} -{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl_articles","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl_articles\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}}]}"},"id":"tmpl_articles","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl_index_pattern","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-08-31T04:20:24.604Z","version":"WzEwLDFd"} -{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl_titles","uiStateJSON":"{\"vis\":{\"params\":{\"sort\":{\"columnIndex\":0,\"direction\":\"asc\"}}}}","version":1,"visState":"{\"title\":\"tmpl_titles\",\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":0,\"direction\":\"asc\"},\"showTotal\":false,\"totalFunc\":\"sum\",\"dimensions\":{\"metrics\":[{\"accessor\":1,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"},{\"accessor\":2,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"}],\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\",\"customLabel\":\"Polarity\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"message.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":100,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"Title\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"subjectivity\",\"customLabel\":\"Subjectivity\"}}]}"},"id":"tmpl_titltes","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl_index_pattern","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-08-31T04:39:07.986Z","version":"WzEzLDFd"} -{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[]}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"gridData\":{\"x\":10,\"y\":0,\"w\":14,\"h\":15,\"i\":\"1\"},\"version\":\"7.3.1\",\"panelIndex\":\"1\",\"embeddableConfig\":{},\"title\":\"Polarity\",\"panelRefName\":\"panel_0\"},{\"gridData\":{\"x\":0,\"y\":0,\"w\":10,\"h\":15,\"i\":\"2\"},\"version\":\"7.3.1\",\"panelIndex\":\"2\",\"embeddableConfig\":{\"vis\":{\"legendOpen\":false}},\"title\":\"Sentiniel\",\"panelRefName\":\"panel_1\"},{\"gridData\":{\"x\":24,\"y\":0,\"w\":24,\"h\":15,\"i\":\"3\"},\"version\":\"7.3.1\",\"panelIndex\":\"3\",\"embeddableConfig\":{},\"title\":\"Article Count\",\"panelRefName\":\"panel_2\"},{\"gridData\":{\"x\":0,\"y\":15,\"w\":24,\"h\":18,\"i\":\"4\"},\"version\":\"7.3.1\",\"panelIndex\":\"4\",\"embeddableConfig\":{},\"title\":\"Title Analysis\",\"panelRefName\":\"panel_3\"}]","timeRestore":false,"title":"tmpl_dashboard","version":1},"id":"tmpl_dashboard","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"tmpl_polarity_visual","name":"panel_0","type":"visualization"},{"id":"tmpl_polarity_sentinel","name":"panel_1","type":"visualization"},{"id":"tmpl_articles","name":"panel_2","type":"visualization"},{"id":"tmpl_titltes","name":"panel_3","type":"visualization"}],"type":"dashboard","updated_at":"2019-08-31T04:57:26.923Z","version":"WzI1LDFd"} \ No newline at end of file +{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"esTypes\":[\"_id\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"esTypes\":[\"_index\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"esTypes\":[\"_source\"],\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"esTypes\":[\"_type\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"change\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"date\",\"type\":\"date\",\"esTypes\":[\"date\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"message\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"message.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"message\",\"subType\":\"multi\"},{\"name\":\"msg_id\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"msg_id.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"msg_id\",\"subType\":\"multi\"},{\"name\":\"polarity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_high\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_last\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_low\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"referer_url\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"referer_url.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"referer_url\",\"subType\":\"multi\"},{\"name\":\"sentiment\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"sentiment.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"sentiment\",\"subType\":\"multi\"},{\"name\":\"subjectivity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"symbol\",\"type\":\"string\",\"esTypes\":[\"text\",\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"symbol.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"symbol\",\"subType\":\"multi\"},{\"name\":\"title\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"title.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"title\",\"subType\":\"multi\"},{\"name\":\"url\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"url.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"url\",\"subType\":\"multi\"},{\"name\":\"vol\",\"type\":\"number\",\"esTypes\":[\"integer\",\"long\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"date","title":"stocksight_*_tmpl"},"id":"tmpl-index","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2019-09-10T02:41:09.242Z","version":"WzM5LDNd"} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Polarity Dashboard","uiStateJSON":"{}","version":1,"visState":"{\"aggs\":[{\"enabled\":true,\"id\":\"1\",\"params\":{},\"schema\":\"metric\",\"type\":\"count\"},{\"enabled\":true,\"id\":\"2\",\"params\":{\"field\":\"polarity\"},\"schema\":\"metric\",\"type\":\"avg\"},{\"enabled\":true,\"id\":\"3\",\"params\":{\"field\":\"polarity\",\"percents\":[50]},\"schema\":\"metric\",\"type\":\"median\"},{\"enabled\":true,\"id\":\"4\",\"params\":{\"field\":\"polarity\"},\"schema\":\"metric\",\"type\":\"max\"},{\"enabled\":true,\"id\":\"5\",\"params\":{\"field\":\"polarity\"},\"schema\":\"metric\",\"type\":\"min\"}],\"params\":{\"addLegend\":false,\"addTooltip\":true,\"dimensions\":{\"metrics\":[{\"accessor\":0,\"format\":{\"id\":\"number\",\"params\":{}},\"type\":\"vis_dimension\"}]},\"metric\":{\"colorSchema\":\"Green to Red\",\"colorsRange\":[{\"from\":0,\"to\":10000,\"type\":\"range\"}],\"invertColors\":false,\"labels\":{\"show\":true},\"metricColorMode\":\"None\",\"percentageMode\":false,\"style\":{\"bgColor\":false,\"bgFill\":\"#000\",\"fontSize\":60,\"labelColor\":false,\"subText\":\"\"},\"useRanges\":false},\"type\":\"metric\"},\"title\":\"tmpl - Polarity Dashboard\",\"type\":\"metric\"}"},"id":"tmpl-polarity-dashboard","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T02:44:45.235Z","version":"WzQwLDNd"} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Article List","uiStateJSON":"{\"vis\":{\"params\":{\"sort\":{\"columnIndex\":null,\"direction\":null}}}}","version":1,"visState":"{\"title\":\"tmpl - Article List\",\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":null,\"direction\":null},\"showTotal\":false,\"totalFunc\":\"sum\",\"dimensions\":{\"metrics\":[{\"accessor\":2,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"},{\"accessor\":3,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"}],\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"},{\"accessor\":1,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"subjectivity\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"title.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":100,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"Title\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"url.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":50,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"URL\"}}]}"},"id":"tmpl-article-list","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T02:56:23.360Z","version":"WzQzLDNd"} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Article Imports","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Article Imports\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false,\"dimensions\":{\"x\":null,\"y\":[{\"accessor\":0,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"useNormalizedEsInterval\":true,\"interval\":\"auto\",\"drop_partials\":false,\"min_doc_count\":1,\"extended_bounds\":{}}}]}"},"id":"tmpl-article-imports","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T03:11:30.053Z","version":"WzQ4LDNd"} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Sentiment","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Sentiment\",\"type\":\"pie\",\"params\":{\"type\":\"pie\",\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"isDonut\":true,\"labels\":{\"show\":true,\"values\":true,\"last_level\":true,\"truncate\":100},\"dimensions\":{\"metric\":{\"accessor\":1,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"},\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":5,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\"}}]}"},"id":"tmpl-sentiment","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T02:47:11.395Z","version":"WzQxLDNd"} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Stock Price","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Stock Price\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Price\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Price\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false,\"dimensions\":{\"x\":null,\"y\":[{\"accessor\":0,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_last\",\"customLabel\":\"Price\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"useNormalizedEsInterval\":true,\"interval\":\"auto\",\"drop_partials\":false,\"min_doc_count\":1,\"extended_bounds\":{},\"customLabel\":\"Datetime\"}}]}"},"id":"tmpl-stock-price","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T02:57:39.277Z","version":"WzQ1LDNd"} +{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[]}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"gridData\":{\"x\":21,\"y\":0,\"w\":15,\"h\":14,\"i\":\"1\"},\"version\":\"7.3.1\",\"panelIndex\":\"1\",\"embeddableConfig\":{},\"panelRefName\":\"panel_0\"},{\"gridData\":{\"x\":21,\"y\":14,\"w\":27,\"h\":20,\"i\":\"2\"},\"version\":\"7.3.1\",\"panelIndex\":\"2\",\"embeddableConfig\":{},\"panelRefName\":\"panel_1\"},{\"gridData\":{\"x\":0,\"y\":0,\"w\":21,\"h\":14,\"i\":\"3\"},\"version\":\"7.3.1\",\"panelIndex\":\"3\",\"embeddableConfig\":{},\"panelRefName\":\"panel_2\"},{\"gridData\":{\"x\":36,\"y\":0,\"w\":12,\"h\":14,\"i\":\"4\"},\"version\":\"7.3.1\",\"panelIndex\":\"4\",\"embeddableConfig\":{},\"panelRefName\":\"panel_3\"},{\"gridData\":{\"x\":0,\"y\":14,\"w\":21,\"h\":20,\"i\":\"5\"},\"version\":\"7.3.1\",\"panelIndex\":\"5\",\"embeddableConfig\":{},\"panelRefName\":\"panel_4\"}]","timeRestore":false,"title":"tmpl - Dashboard","version":1},"id":"tmpl-dashboard","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"tmpl-polarity-dashboard","name":"panel_0","type":"visualization"},{"id":"tmpl-article-list","name":"panel_1","type":"visualization"},{"id":"tmpl-article-imports","name":"panel_2","type":"visualization"},{"id":"tmpl-sentiment","name":"panel_3","type":"visualization"},{"id":"tmpl-stock-price","name":"panel_4","type":"visualization"}],"type":"dashboard","updated_at":"2019-09-10T03:05:01.030Z","version":"WzQ2LDNd"} \ No newline at end of file From abbc74098f2e9182e70bb1951829fd6f399cb5c9 Mon Sep 17 00:00:00 2001 From: heyqule Date: Mon, 9 Sep 2019 23:43:54 -0400 Subject: [PATCH 37/55] Kibana - remove legend --- src/kibana_export/export.7.3.ndjson | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/kibana_export/export.7.3.ndjson b/src/kibana_export/export.7.3.ndjson index 37300ef..c97278a 100644 --- a/src/kibana_export/export.7.3.ndjson +++ b/src/kibana_export/export.7.3.ndjson @@ -1,7 +1,7 @@ {"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"esTypes\":[\"_id\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"esTypes\":[\"_index\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"esTypes\":[\"_source\"],\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"esTypes\":[\"_type\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"change\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"date\",\"type\":\"date\",\"esTypes\":[\"date\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"message\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"message.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"message\",\"subType\":\"multi\"},{\"name\":\"msg_id\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"msg_id.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"msg_id\",\"subType\":\"multi\"},{\"name\":\"polarity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_high\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_last\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_low\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"referer_url\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"referer_url.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"referer_url\",\"subType\":\"multi\"},{\"name\":\"sentiment\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"sentiment.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"sentiment\",\"subType\":\"multi\"},{\"name\":\"subjectivity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"symbol\",\"type\":\"string\",\"esTypes\":[\"text\",\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"symbol.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"symbol\",\"subType\":\"multi\"},{\"name\":\"title\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"title.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"title\",\"subType\":\"multi\"},{\"name\":\"url\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"url.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"url\",\"subType\":\"multi\"},{\"name\":\"vol\",\"type\":\"number\",\"esTypes\":[\"integer\",\"long\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"date","title":"stocksight_*_tmpl"},"id":"tmpl-index","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2019-09-10T02:41:09.242Z","version":"WzM5LDNd"} {"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Polarity Dashboard","uiStateJSON":"{}","version":1,"visState":"{\"aggs\":[{\"enabled\":true,\"id\":\"1\",\"params\":{},\"schema\":\"metric\",\"type\":\"count\"},{\"enabled\":true,\"id\":\"2\",\"params\":{\"field\":\"polarity\"},\"schema\":\"metric\",\"type\":\"avg\"},{\"enabled\":true,\"id\":\"3\",\"params\":{\"field\":\"polarity\",\"percents\":[50]},\"schema\":\"metric\",\"type\":\"median\"},{\"enabled\":true,\"id\":\"4\",\"params\":{\"field\":\"polarity\"},\"schema\":\"metric\",\"type\":\"max\"},{\"enabled\":true,\"id\":\"5\",\"params\":{\"field\":\"polarity\"},\"schema\":\"metric\",\"type\":\"min\"}],\"params\":{\"addLegend\":false,\"addTooltip\":true,\"dimensions\":{\"metrics\":[{\"accessor\":0,\"format\":{\"id\":\"number\",\"params\":{}},\"type\":\"vis_dimension\"}]},\"metric\":{\"colorSchema\":\"Green to Red\",\"colorsRange\":[{\"from\":0,\"to\":10000,\"type\":\"range\"}],\"invertColors\":false,\"labels\":{\"show\":true},\"metricColorMode\":\"None\",\"percentageMode\":false,\"style\":{\"bgColor\":false,\"bgFill\":\"#000\",\"fontSize\":60,\"labelColor\":false,\"subText\":\"\"},\"useRanges\":false},\"type\":\"metric\"},\"title\":\"tmpl - Polarity Dashboard\",\"type\":\"metric\"}"},"id":"tmpl-polarity-dashboard","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T02:44:45.235Z","version":"WzQwLDNd"} {"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Article List","uiStateJSON":"{\"vis\":{\"params\":{\"sort\":{\"columnIndex\":null,\"direction\":null}}}}","version":1,"visState":"{\"title\":\"tmpl - Article List\",\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":null,\"direction\":null},\"showTotal\":false,\"totalFunc\":\"sum\",\"dimensions\":{\"metrics\":[{\"accessor\":2,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"},{\"accessor\":3,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"}],\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"},{\"accessor\":1,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"subjectivity\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"title.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":100,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"Title\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"url.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":50,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"URL\"}}]}"},"id":"tmpl-article-list","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T02:56:23.360Z","version":"WzQzLDNd"} -{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Article Imports","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Article Imports\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false,\"dimensions\":{\"x\":null,\"y\":[{\"accessor\":0,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"useNormalizedEsInterval\":true,\"interval\":\"auto\",\"drop_partials\":false,\"min_doc_count\":1,\"extended_bounds\":{}}}]}"},"id":"tmpl-article-imports","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T03:11:30.053Z","version":"WzQ4LDNd"} -{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Sentiment","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Sentiment\",\"type\":\"pie\",\"params\":{\"type\":\"pie\",\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"isDonut\":true,\"labels\":{\"show\":true,\"values\":true,\"last_level\":true,\"truncate\":100},\"dimensions\":{\"metric\":{\"accessor\":1,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"},\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":5,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\"}}]}"},"id":"tmpl-sentiment","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T02:47:11.395Z","version":"WzQxLDNd"} -{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Stock Price","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Stock Price\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Price\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Price\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false,\"dimensions\":{\"x\":null,\"y\":[{\"accessor\":0,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_last\",\"customLabel\":\"Price\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"useNormalizedEsInterval\":true,\"interval\":\"auto\",\"drop_partials\":false,\"min_doc_count\":1,\"extended_bounds\":{},\"customLabel\":\"Datetime\"}}]}"},"id":"tmpl-stock-price","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T02:57:39.277Z","version":"WzQ1LDNd"} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Article Imports","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Article Imports\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":false,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false,\"dimensions\":{\"x\":null,\"y\":[{\"accessor\":0,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"useNormalizedEsInterval\":true,\"interval\":\"auto\",\"drop_partials\":false,\"min_doc_count\":1,\"extended_bounds\":{}}}]}"},"id":"tmpl-article-imports","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T03:11:30.053Z","version":"WzQ4LDNd"} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Sentiment","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Sentiment\",\"type\":\"pie\",\"params\":{\"type\":\"pie\",\"addTooltip\":true,\"addLegend\":false,\"legendPosition\":\"right\",\"isDonut\":true,\"labels\":{\"show\":true,\"values\":true,\"last_level\":true,\"truncate\":100},\"dimensions\":{\"metric\":{\"accessor\":1,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"},\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":5,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\"}}]}"},"id":"tmpl-sentiment","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T02:47:11.395Z","version":"WzQxLDNd"} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Stock Price","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Stock Price\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Price\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Price\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":false,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false,\"dimensions\":{\"x\":null,\"y\":[{\"accessor\":0,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_last\",\"customLabel\":\"Price\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"useNormalizedEsInterval\":true,\"interval\":\"auto\",\"drop_partials\":false,\"min_doc_count\":1,\"extended_bounds\":{},\"customLabel\":\"Datetime\"}}]}"},"id":"tmpl-stock-price","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T02:57:39.277Z","version":"WzQ1LDNd"} {"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[]}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"gridData\":{\"x\":21,\"y\":0,\"w\":15,\"h\":14,\"i\":\"1\"},\"version\":\"7.3.1\",\"panelIndex\":\"1\",\"embeddableConfig\":{},\"panelRefName\":\"panel_0\"},{\"gridData\":{\"x\":21,\"y\":14,\"w\":27,\"h\":20,\"i\":\"2\"},\"version\":\"7.3.1\",\"panelIndex\":\"2\",\"embeddableConfig\":{},\"panelRefName\":\"panel_1\"},{\"gridData\":{\"x\":0,\"y\":0,\"w\":21,\"h\":14,\"i\":\"3\"},\"version\":\"7.3.1\",\"panelIndex\":\"3\",\"embeddableConfig\":{},\"panelRefName\":\"panel_2\"},{\"gridData\":{\"x\":36,\"y\":0,\"w\":12,\"h\":14,\"i\":\"4\"},\"version\":\"7.3.1\",\"panelIndex\":\"4\",\"embeddableConfig\":{},\"panelRefName\":\"panel_3\"},{\"gridData\":{\"x\":0,\"y\":14,\"w\":21,\"h\":20,\"i\":\"5\"},\"version\":\"7.3.1\",\"panelIndex\":\"5\",\"embeddableConfig\":{},\"panelRefName\":\"panel_4\"}]","timeRestore":false,"title":"tmpl - Dashboard","version":1},"id":"tmpl-dashboard","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"tmpl-polarity-dashboard","name":"panel_0","type":"visualization"},{"id":"tmpl-article-list","name":"panel_1","type":"visualization"},{"id":"tmpl-article-imports","name":"panel_2","type":"visualization"},{"id":"tmpl-sentiment","name":"panel_3","type":"visualization"},{"id":"tmpl-stock-price","name":"panel_4","type":"visualization"}],"type":"dashboard","updated_at":"2019-09-10T03:05:01.030Z","version":"WzQ2LDNd"} \ No newline at end of file From a6002ac022c21c00007a6312daf35599a3a55a93 Mon Sep 17 00:00:00 2001 From: heyqule Date: Tue, 10 Sep 2019 00:29:52 -0400 Subject: [PATCH 38/55] Add kibana listener --- src/startup.sh | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/startup.sh b/src/startup.sh index 4afa5e3..487012e 100755 --- a/src/startup.sh +++ b/src/startup.sh @@ -7,7 +7,9 @@ #Wait script based on https://github.com/elastic/elasticsearch-py/issues/778#issuecomment-384389668 host='http://elasticsearch:9200' +kibanahost='http://kibana:5601/api/status' +#wait for elastic until $(curl --output /dev/null --silent --head --fail "$host"); do printf '.' sleep 5 @@ -15,7 +17,6 @@ done # First wait for ES to start... response=$(curl $host) - until [ "$response" = "200" ]; do response=$(curl --write-out %{http_code} --silent --output /dev/null "$host") >&2 echo "Elastic Search is unavailable - sleeping" @@ -34,6 +35,20 @@ until [ "$health" = 'green' ] || [ "$health" = 'yellow' ]; do sleep 5 done +# First wait for Kibana to start... +response=$(curl $kibanahost) +until [ "$response" = "200" ]; do + response=$(curl --write-out %{http_code} --silent --output /dev/null "$kibanahost") + >&2 echo "Kibana is unavailable - sleeping" + sleep 5 +done + +kibana_health="$(curl -fsSL "$kibanahost")" +while [[ "$kibana_health" == *"Kibana server is not ready yet"* ]]; do + kibana_health="$(curl -fsSL "$kibanahost")" + >&2 echo "Kibana is not ready yet." + sleep 5 +done echo "Copy kibana dashboard if they don't exist"; python import.kibana.py & From c6cf17b66fbdf6c786868aa3ed57bb96b5447edb Mon Sep 17 00:00:00 2001 From: heyqule Date: Tue, 10 Sep 2019 00:41:53 -0400 Subject: [PATCH 39/55] Revert ndjson --- src/kibana_export/export.7.3.ndjson | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/kibana_export/export.7.3.ndjson b/src/kibana_export/export.7.3.ndjson index c97278a..37300ef 100644 --- a/src/kibana_export/export.7.3.ndjson +++ b/src/kibana_export/export.7.3.ndjson @@ -1,7 +1,7 @@ {"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"esTypes\":[\"_id\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"esTypes\":[\"_index\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"esTypes\":[\"_source\"],\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"esTypes\":[\"_type\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"change\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"date\",\"type\":\"date\",\"esTypes\":[\"date\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"message\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"message.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"message\",\"subType\":\"multi\"},{\"name\":\"msg_id\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"msg_id.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"msg_id\",\"subType\":\"multi\"},{\"name\":\"polarity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_high\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_last\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_low\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"referer_url\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"referer_url.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"referer_url\",\"subType\":\"multi\"},{\"name\":\"sentiment\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"sentiment.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"sentiment\",\"subType\":\"multi\"},{\"name\":\"subjectivity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"symbol\",\"type\":\"string\",\"esTypes\":[\"text\",\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"symbol.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"symbol\",\"subType\":\"multi\"},{\"name\":\"title\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"title.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"title\",\"subType\":\"multi\"},{\"name\":\"url\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"url.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"url\",\"subType\":\"multi\"},{\"name\":\"vol\",\"type\":\"number\",\"esTypes\":[\"integer\",\"long\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"date","title":"stocksight_*_tmpl"},"id":"tmpl-index","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2019-09-10T02:41:09.242Z","version":"WzM5LDNd"} {"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Polarity Dashboard","uiStateJSON":"{}","version":1,"visState":"{\"aggs\":[{\"enabled\":true,\"id\":\"1\",\"params\":{},\"schema\":\"metric\",\"type\":\"count\"},{\"enabled\":true,\"id\":\"2\",\"params\":{\"field\":\"polarity\"},\"schema\":\"metric\",\"type\":\"avg\"},{\"enabled\":true,\"id\":\"3\",\"params\":{\"field\":\"polarity\",\"percents\":[50]},\"schema\":\"metric\",\"type\":\"median\"},{\"enabled\":true,\"id\":\"4\",\"params\":{\"field\":\"polarity\"},\"schema\":\"metric\",\"type\":\"max\"},{\"enabled\":true,\"id\":\"5\",\"params\":{\"field\":\"polarity\"},\"schema\":\"metric\",\"type\":\"min\"}],\"params\":{\"addLegend\":false,\"addTooltip\":true,\"dimensions\":{\"metrics\":[{\"accessor\":0,\"format\":{\"id\":\"number\",\"params\":{}},\"type\":\"vis_dimension\"}]},\"metric\":{\"colorSchema\":\"Green to Red\",\"colorsRange\":[{\"from\":0,\"to\":10000,\"type\":\"range\"}],\"invertColors\":false,\"labels\":{\"show\":true},\"metricColorMode\":\"None\",\"percentageMode\":false,\"style\":{\"bgColor\":false,\"bgFill\":\"#000\",\"fontSize\":60,\"labelColor\":false,\"subText\":\"\"},\"useRanges\":false},\"type\":\"metric\"},\"title\":\"tmpl - Polarity Dashboard\",\"type\":\"metric\"}"},"id":"tmpl-polarity-dashboard","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T02:44:45.235Z","version":"WzQwLDNd"} {"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Article List","uiStateJSON":"{\"vis\":{\"params\":{\"sort\":{\"columnIndex\":null,\"direction\":null}}}}","version":1,"visState":"{\"title\":\"tmpl - Article List\",\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":null,\"direction\":null},\"showTotal\":false,\"totalFunc\":\"sum\",\"dimensions\":{\"metrics\":[{\"accessor\":2,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"},{\"accessor\":3,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"}],\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"},{\"accessor\":1,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"subjectivity\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"title.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":100,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"Title\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"url.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":50,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"URL\"}}]}"},"id":"tmpl-article-list","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T02:56:23.360Z","version":"WzQzLDNd"} -{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Article Imports","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Article Imports\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":false,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false,\"dimensions\":{\"x\":null,\"y\":[{\"accessor\":0,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"useNormalizedEsInterval\":true,\"interval\":\"auto\",\"drop_partials\":false,\"min_doc_count\":1,\"extended_bounds\":{}}}]}"},"id":"tmpl-article-imports","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T03:11:30.053Z","version":"WzQ4LDNd"} -{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Sentiment","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Sentiment\",\"type\":\"pie\",\"params\":{\"type\":\"pie\",\"addTooltip\":true,\"addLegend\":false,\"legendPosition\":\"right\",\"isDonut\":true,\"labels\":{\"show\":true,\"values\":true,\"last_level\":true,\"truncate\":100},\"dimensions\":{\"metric\":{\"accessor\":1,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"},\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":5,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\"}}]}"},"id":"tmpl-sentiment","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T02:47:11.395Z","version":"WzQxLDNd"} -{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Stock Price","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Stock Price\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Price\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Price\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":false,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false,\"dimensions\":{\"x\":null,\"y\":[{\"accessor\":0,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_last\",\"customLabel\":\"Price\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"useNormalizedEsInterval\":true,\"interval\":\"auto\",\"drop_partials\":false,\"min_doc_count\":1,\"extended_bounds\":{},\"customLabel\":\"Datetime\"}}]}"},"id":"tmpl-stock-price","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T02:57:39.277Z","version":"WzQ1LDNd"} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Article Imports","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Article Imports\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false,\"dimensions\":{\"x\":null,\"y\":[{\"accessor\":0,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"useNormalizedEsInterval\":true,\"interval\":\"auto\",\"drop_partials\":false,\"min_doc_count\":1,\"extended_bounds\":{}}}]}"},"id":"tmpl-article-imports","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T03:11:30.053Z","version":"WzQ4LDNd"} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Sentiment","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Sentiment\",\"type\":\"pie\",\"params\":{\"type\":\"pie\",\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"isDonut\":true,\"labels\":{\"show\":true,\"values\":true,\"last_level\":true,\"truncate\":100},\"dimensions\":{\"metric\":{\"accessor\":1,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"},\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":5,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\"}}]}"},"id":"tmpl-sentiment","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T02:47:11.395Z","version":"WzQxLDNd"} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Stock Price","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Stock Price\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Price\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Price\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false,\"dimensions\":{\"x\":null,\"y\":[{\"accessor\":0,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_last\",\"customLabel\":\"Price\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"useNormalizedEsInterval\":true,\"interval\":\"auto\",\"drop_partials\":false,\"min_doc_count\":1,\"extended_bounds\":{},\"customLabel\":\"Datetime\"}}]}"},"id":"tmpl-stock-price","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T02:57:39.277Z","version":"WzQ1LDNd"} {"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[]}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"gridData\":{\"x\":21,\"y\":0,\"w\":15,\"h\":14,\"i\":\"1\"},\"version\":\"7.3.1\",\"panelIndex\":\"1\",\"embeddableConfig\":{},\"panelRefName\":\"panel_0\"},{\"gridData\":{\"x\":21,\"y\":14,\"w\":27,\"h\":20,\"i\":\"2\"},\"version\":\"7.3.1\",\"panelIndex\":\"2\",\"embeddableConfig\":{},\"panelRefName\":\"panel_1\"},{\"gridData\":{\"x\":0,\"y\":0,\"w\":21,\"h\":14,\"i\":\"3\"},\"version\":\"7.3.1\",\"panelIndex\":\"3\",\"embeddableConfig\":{},\"panelRefName\":\"panel_2\"},{\"gridData\":{\"x\":36,\"y\":0,\"w\":12,\"h\":14,\"i\":\"4\"},\"version\":\"7.3.1\",\"panelIndex\":\"4\",\"embeddableConfig\":{},\"panelRefName\":\"panel_3\"},{\"gridData\":{\"x\":0,\"y\":14,\"w\":21,\"h\":20,\"i\":\"5\"},\"version\":\"7.3.1\",\"panelIndex\":\"5\",\"embeddableConfig\":{},\"panelRefName\":\"panel_4\"}]","timeRestore":false,"title":"tmpl - Dashboard","version":1},"id":"tmpl-dashboard","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"tmpl-polarity-dashboard","name":"panel_0","type":"visualization"},{"id":"tmpl-article-list","name":"panel_1","type":"visualization"},{"id":"tmpl-article-imports","name":"panel_2","type":"visualization"},{"id":"tmpl-sentiment","name":"panel_3","type":"visualization"},{"id":"tmpl-stock-price","name":"panel_4","type":"visualization"}],"type":"dashboard","updated_at":"2019-09-10T03:05:01.030Z","version":"WzQ2LDNd"} \ No newline at end of file From bda22a4fbe634989ca6d849845bfe07aeece4212 Mon Sep 17 00:00:00 2001 From: heyqule Date: Tue, 10 Sep 2019 17:31:01 -0400 Subject: [PATCH 40/55] Attempt to fix stock price operant error --- src/StockSight/StockPriceListener.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/StockSight/StockPriceListener.py b/src/StockSight/StockPriceListener.py index 9b1e3f2..c0f81c3 100644 --- a/src/StockSight/StockPriceListener.py +++ b/src/StockSight/StockPriceListener.py @@ -66,12 +66,17 @@ def get_price(self, symbol): D['date'] = time.strftime('%Y-%m-%dT%H:%M:%S', time.gmtime()) # time now in gmt (utc) try: D['change'] = (data['chart']['result'][0]['indicators']['quote'][0]['close'][-1] - - data['chart']['result'][0]['indicators']['quote'][0]['close'][-2]) / \ - data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] * 100 + data['chart']['result'][0]['indicators']['quote'][0]['open'][-1]) / \ + data['chart']['result'][0]['indicators']['quote'][0]['open'][-1] * 100 except TypeError: - D['change'] = (data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] - - data['chart']['result'][0]['indicators']['quote'][0]['close'][-3]) / \ - data['chart']['result'][0]['indicators']['quote'][0]['close'][-3] * 100 + if data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] is not None and \ + data['chart']['result'][0]['indicators']['quote'][0]['open'][-2] is not None: + D['change'] = (data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] - + data['chart']['result'][0]['indicators']['quote'][0]['open'][-2]) / \ + data['chart']['result'][0]['indicators']['quote'][0]['open'][-2] * 100 + else: + D['change'] = 0 + pass D['high'] = data['chart']['result'][0]['indicators']['quote'][0]['high'][-1] if D['high'] is None: From b7226d47896238fa15d2e6574f603a006a0e3bb2 Mon Sep 17 00:00:00 2001 From: heyqule Date: Tue, 10 Sep 2019 21:55:48 -0400 Subject: [PATCH 41/55] Fix elastic mapping Fix SeekingAlpha message handler --- src/StockSight/EsMap/Sentiment.py | 46 ++++++++++++++++++--- src/StockSight/Initializer/ElasticSearch.py | 5 +-- src/StockSight/SeekAlphaListener.py | 8 ++-- src/StockSight/YahooFinanceListener.py | 4 +- src/stockprice.py | 2 +- tests/StockSight/SeekAlphaListenerTest.py | 16 ++++--- 6 files changed, 60 insertions(+), 21 deletions(-) diff --git a/src/StockSight/EsMap/Sentiment.py b/src/StockSight/EsMap/Sentiment.py index afe5e17..7bb294b 100644 --- a/src/StockSight/EsMap/Sentiment.py +++ b/src/StockSight/EsMap/Sentiment.py @@ -3,28 +3,58 @@ "mappings": { "properties": { "author": { - "type": "keyword", + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } }, "referer_url": { - "type": "keyword", + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } }, "url": { "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } }, "location": { - "type": "keyword", + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } }, "date": { "type": "date" }, "title": { "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } }, "message": { "type": "text", }, "msg_id": { - "type": "keyword" + "type": "keyword", }, "polarity": { "type": "float" @@ -33,7 +63,13 @@ "type": "float" }, "sentiment": { - "type": "keyword", + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } } } }, diff --git a/src/StockSight/Initializer/ElasticSearch.py b/src/StockSight/Initializer/ElasticSearch.py index 45b4f28..5e04970 100644 --- a/src/StockSight/Initializer/ElasticSearch.py +++ b/src/StockSight/Initializer/ElasticSearch.py @@ -11,10 +11,7 @@ LICENSE for the full license text. """ -try: - from elasticsearch5 import Elasticsearch -except ImportError: - from elasticsearch import Elasticsearch +from elasticsearch import Elasticsearch from StockSight.Initializer.ConfigReader import config diff --git a/src/StockSight/SeekAlphaListener.py b/src/StockSight/SeekAlphaListener.py index d80c5ad..270efb2 100644 --- a/src/StockSight/SeekAlphaListener.py +++ b/src/StockSight/SeekAlphaListener.py @@ -61,8 +61,8 @@ def get_news_summary(self, url): if html_p: for i in html_p: - if i.string is not None: - yield i.string + if i.text is not None: + yield i.text else: break @@ -79,8 +79,8 @@ def get_analysis_summary(self, url): if html_p: for i in html_p: - if i.string is not None: - yield i.string + if i.text is not None: + yield i.text else: break diff --git a/src/StockSight/YahooFinanceListener.py b/src/StockSight/YahooFinanceListener.py index 454e19d..0ec5e0a 100644 --- a/src/StockSight/YahooFinanceListener.py +++ b/src/StockSight/YahooFinanceListener.py @@ -50,8 +50,8 @@ def get_page_text(self, url): n = 1 for i in html_p: if n <= max_paragraphs: - if i.string is not None: - yield i.string + if i.text is not None: + yield i.text else: break n += 1 diff --git a/src/stockprice.py b/src/stockprice.py index 10d5c4d..1758836 100644 --- a/src/stockprice.py +++ b/src/stockprice.py @@ -82,7 +82,7 @@ for symbol in config['tickers']: try: logger.info('Creating new Price index or using existing ' + symbol) - es.indices.create(index=config['elasticsearch']['table_prefix']['sentiment']+symbol.lower(), + es.indices.create(index=config['elasticsearch']['table_prefix']['price']+symbol.lower(), body=mapping, ignore=[400, 404]) stockprice = StockPriceListener() diff --git a/tests/StockSight/SeekAlphaListenerTest.py b/tests/StockSight/SeekAlphaListenerTest.py index 9dc1798..438a029 100644 --- a/tests/StockSight/SeekAlphaListenerTest.py +++ b/tests/StockSight/SeekAlphaListenerTest.py @@ -41,15 +41,21 @@ def test_get_news_headlines_with_body(self): self.assertIsNotNone(headlines[0].title, "Title is empty") self.assertIsNotNone(headlines[0].url, "URL is empty") self.assertNotEqual(headlines[0].referer_url, '', "Refer URL is empty") - hasBody = False + has_article_body = False + has_news_body = False for message in headlines: if message.body == '': continue - else: - hasBody = True - break + elif message.url.find('article') > -1: + has_article_body = True + elif message.url.find('news') > -1: + has_news_body = True - self.assertEqual(hasBody, True, "Body is empty") + if has_article_body and has_news_body: + break; + + self.assertEqual(has_news_body, True, "News body is empty") + self.assertEqual(has_article_body, True, "Article body is empty") def test_execute(self): From 5cde9c9c6db6be72da6c5fce39f8896955eed816 Mon Sep 17 00:00:00 2001 From: heyqule Date: Wed, 11 Sep 2019 00:03:55 -0400 Subject: [PATCH 42/55] Add delay for Seek Alpha --- src/StockSight/NewsHeadlineListener.py | 1 - src/StockSight/SeekAlphaListener.py | 5 ++++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/StockSight/NewsHeadlineListener.py b/src/StockSight/NewsHeadlineListener.py index 53720ba..da50f08 100644 --- a/src/StockSight/NewsHeadlineListener.py +++ b/src/StockSight/NewsHeadlineListener.py @@ -10,7 +10,6 @@ stocksight is released under the Apache 2.0 license. See LICENSE for the full license text. """ -import hashlib import re from datetime import datetime diff --git a/src/StockSight/SeekAlphaListener.py b/src/StockSight/SeekAlphaListener.py index 270efb2..a7b408e 100644 --- a/src/StockSight/SeekAlphaListener.py +++ b/src/StockSight/SeekAlphaListener.py @@ -1,10 +1,11 @@ from StockSight.NewsHeadlineListener import * - +import time class SeekAlphaListener(NewsHeadlineListener): def __init__(self, symbol): super(SeekAlphaListener, self)\ .__init__("Seek Alpha", symbol, "https://seekingalpha.com/symbol/%s" % symbol) + self.delay = 10 def get_news_headlines(self): @@ -53,6 +54,7 @@ def get_page_text(self, url): pass def get_news_summary(self, url): + time.sleep(5) try: req = requests.get(url) html = req.text @@ -71,6 +73,7 @@ def get_news_summary(self, url): pass def get_analysis_summary(self, url): + time.sleep(5) try: req = requests.get(str(url)) html = req.text From c3431c4d5d773b7d1390c042845ead68fedd19de Mon Sep 17 00:00:00 2001 From: heyqule Date: Wed, 11 Sep 2019 00:05:41 -0400 Subject: [PATCH 43/55] Add delay for Seek Alpha --- src/StockSight/SeekAlphaListener.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/StockSight/SeekAlphaListener.py b/src/StockSight/SeekAlphaListener.py index a7b408e..b438113 100644 --- a/src/StockSight/SeekAlphaListener.py +++ b/src/StockSight/SeekAlphaListener.py @@ -54,7 +54,7 @@ def get_page_text(self, url): pass def get_news_summary(self, url): - time.sleep(5) + time.sleep(self.delay) try: req = requests.get(url) html = req.text @@ -73,7 +73,7 @@ def get_news_summary(self, url): pass def get_analysis_summary(self, url): - time.sleep(5) + time.sleep(self.delay) try: req = requests.get(str(url)) html = req.text From d086bc65cdc2ea1c88649c3cb44468dbabe34507 Mon Sep 17 00:00:00 2001 From: heyqule Date: Sat, 14 Sep 2019 16:16:22 -0400 Subject: [PATCH 44/55] - Separate sentiment for message and title - Move interval ticker to environment variable --- docker-compose.yml | 9 ++++ python-docker/requirements.txt | 3 +- src/StockSight/EsMap/Sentiment.py | 15 ++++++ src/StockSight/EsMap/StockPrice.py | 6 +++ src/StockSight/Helper/RequestRandomizer.py | 51 ++++++++++++++++++ src/StockSight/Helper/Sentiment.py | 10 ++-- src/StockSight/NewsHeadlineListener.py | 18 ++++++- src/StockSight/SeekAlphaListener.py | 53 ++++++------------- src/StockSight/StockPriceListener.py | 17 +++++- src/StockSight/YahooFinanceListener.py | 11 ++-- src/startup.sh | 10 ++-- .../Helper/RequestRandomizerTest.py | 31 +++++++++++ tests/StockSight/SeekAlphaListenerTest.py | 19 +++---- 13 files changed, 183 insertions(+), 70 deletions(-) create mode 100644 src/StockSight/Helper/RequestRandomizer.py create mode 100644 tests/StockSight/Helper/RequestRandomizerTest.py diff --git a/docker-compose.yml b/docker-compose.yml index e97029d..231f7e1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -26,23 +26,32 @@ services: #expose this port for local dev only! ports: - "9200:9200" + restart: unless-stopped redis: build: context: ./redis-docker #expose this port for local dev only! ports: - "6379:6379" + restart: unless-stopped kibana: image: docker.elastic.co/kibana/kibana:7.3.1 depends_on: - elasticsearch ports: - "5601:5601" + restart: unless-stopped python3: build: context: ./python-docker + environment: + #interval for getting stock price (in seconds) + - tick_time=900 + #interval for getting stock news (tick_time * news_cycle) + - news_cycle=4 depends_on: - elasticsearch - redis volumes: - ./src:/usr/src/app + restart: unless-stopped diff --git a/python-docker/requirements.txt b/python-docker/requirements.txt index 41882af..01364dd 100644 --- a/python-docker/requirements.txt +++ b/python-docker/requirements.txt @@ -7,4 +7,5 @@ textblob vaderSentiment pytz redis -pyyaml \ No newline at end of file +pyyaml +fake-useragent \ No newline at end of file diff --git a/src/StockSight/EsMap/Sentiment.py b/src/StockSight/EsMap/Sentiment.py index 7bb294b..1932922 100644 --- a/src/StockSight/EsMap/Sentiment.py +++ b/src/StockSight/EsMap/Sentiment.py @@ -70,6 +70,21 @@ "ignore_above": 256 } } + }, + "msg_polarity": { + "type": "float" + }, + "msg_subjectivity": { + "type": "float" + }, + "msg_sentiment": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } } } }, diff --git a/src/StockSight/EsMap/StockPrice.py b/src/StockSight/EsMap/StockPrice.py index 3f9a017..033c65c 100644 --- a/src/StockSight/EsMap/StockPrice.py +++ b/src/StockSight/EsMap/StockPrice.py @@ -20,6 +20,12 @@ "price_low": { "type": "float" }, + "price_open": { + "type": "float" + }, + "price_close": { + "type": "float" + }, "vol": { "type": "integer" } diff --git a/src/StockSight/Helper/RequestRandomizer.py b/src/StockSight/Helper/RequestRandomizer.py new file mode 100644 index 0000000..eaa17b8 --- /dev/null +++ b/src/StockSight/Helper/RequestRandomizer.py @@ -0,0 +1,51 @@ +# implement random proxy + +#https://github.com/hellysmile/fake-useragent implement random user-agent + +from fake_useragent import UserAgent +import requests +from random import randint + +from StockSight.Initializer.Redis import rds + +class RequestRandomizer: + + @staticmethod + def get_a_proxy(): + ip_list = RequestRandomizer.get_raw_ip_list() + + ip_list_length = len(ip_list) + if ip_list_length is 0: + raise ProxyIpNotFound + + current_location = randint(0, ip_list_length-2); + selected_ip_origin = ip_list[current_location] + selected_ip = selected_ip_origin.strip() + + return {'https': selected_ip, 'http': selected_ip} + + @classmethod + def get_raw_ip_list(cls): + proxies_list = rds.get('proxy_list') + if proxies_list is None: + proxies_list_response = requests.get('https://www.proxy-list.download/api/v1/get?type=https&anon=elite&country=US') + proxies_list = proxies_list_response.text + + ip_list = proxies_list.split("\r\n") + + rds.set('proxy_list', proxies_list, 86400) + else: + ip_list = proxies_list.decode().split("\r\n") + + return ip_list + + + @staticmethod + def get_a_user_agent(): + ua = UserAgent() + return ua.firefox + + +class ProxyIpNotFound(Exception): + """Raised when the PROXY IP are not functional""" + pass \ No newline at end of file diff --git a/src/StockSight/Helper/Sentiment.py b/src/StockSight/Helper/Sentiment.py index 1b4ca6c..5621c18 100644 --- a/src/StockSight/Helper/Sentiment.py +++ b/src/StockSight/Helper/Sentiment.py @@ -57,7 +57,7 @@ def get_sentiment_from_url(text, sentiment_url): return sentiment -def sentiment_analysis(text): +def sentiment_analysis(text, online = False): """Determine if sentiment is positive, negative, or neutral algorithm to figure out if sentiment is positive, negative or neutral uses sentiment polarity from TextBlob, VADER Sentiment and @@ -65,9 +65,11 @@ def sentiment_analysis(text): could be made better :) :param text: """ - sentiment_url = 'http://text-processing.com/api/sentiment/' - # pass text into sentiment url - sentiment_url = get_sentiment_from_url(text, sentiment_url) + sentiment_url = None; + if online == True: + sentiment_url = 'http://text-processing.com/api/sentiment/' + # pass text into sentiment url + sentiment_url = get_sentiment_from_url(text, sentiment_url) # pass text into TextBlob text_tb = TextBlob(text) diff --git a/src/StockSight/NewsHeadlineListener.py b/src/StockSight/NewsHeadlineListener.py index da50f08..0b4a08b 100644 --- a/src/StockSight/NewsHeadlineListener.py +++ b/src/StockSight/NewsHeadlineListener.py @@ -12,6 +12,8 @@ """ import re from datetime import datetime +import time +from random import randint import nltk @@ -29,6 +31,7 @@ from StockSight.Model.Article import * + class NewsHeadlineListener(ABC): def __init__(self, news_type, symbol, url=None): self.symbol = symbol @@ -83,7 +86,8 @@ def execute(self): continue # get sentiment values - polarity, subjectivity, sentiment = sentiment_analysis(article_obj.title + "/n" + article_obj.body) + polarity, subjectivity, sentiment = sentiment_analysis(article_obj.title, True) + msg_polarity, msg_subjectivity, msg_sentiment = sentiment_analysis(article_obj.body) logger.info("Adding news headline to elasticsearch") # add news headline data and sentiment info to elasticsearch @@ -98,7 +102,10 @@ def execute(self): "message": article_obj.body, "polarity": polarity, "subjectivity": subjectivity, - "sentiment": sentiment + "sentiment": sentiment, + "msg_polarity": msg_polarity, + "msg_subjectivity": msg_subjectivity, + "msg_sentiment": msg_sentiment }) rds.set(article_obj.msg_id, 1, self.cache_length) @@ -130,3 +137,10 @@ def get_proper_new_body_url(self, article_url, host): def can_process(self, article): return article is not None and rds.exists(article.msg_id) is 0 + + def get_soup(self, url): + time.sleep(randint(1,3)) + req = requests.get(self.url) + html = req.text + soup = BeautifulSoup(html, 'html.parser') + return soup diff --git a/src/StockSight/SeekAlphaListener.py b/src/StockSight/SeekAlphaListener.py index b438113..782e80f 100644 --- a/src/StockSight/SeekAlphaListener.py +++ b/src/StockSight/SeekAlphaListener.py @@ -1,11 +1,11 @@ from StockSight.NewsHeadlineListener import * import time +from random import randint class SeekAlphaListener(NewsHeadlineListener): def __init__(self, symbol): super(SeekAlphaListener, self)\ .__init__("Seek Alpha", symbol, "https://seekingalpha.com/symbol/%s" % symbol) - self.delay = 10 def get_news_headlines(self): @@ -14,20 +14,20 @@ def get_news_headlines(self): parsed_uri = urlparse.urljoin(self.url, '/') try: - req = requests.get(self.url) - html = req.text - soup = BeautifulSoup(html, 'html.parser') + soup = self.get_soup(self.url) analysis = soup.select('div.analysis div.symbol_article') news = soup.select('div.news div.symbol_article') if analysis: + for rawArticle in analysis: article = self.get_article_with_atag(rawArticle, parsed_uri) if self.can_process(article): - if config['news']['follow_link']: - body_url = article.url - for p in self.get_analysis_summary(body_url): - article.body += str(p)+" " + + # if config['news']['follow_link']: + # body_url = article.url + # for p in self.get_analysis_summary(body_url, 'p.bullets_li'): + # article.body += str(p)+" " article.referer_url = self.url articles.append(article) @@ -36,10 +36,11 @@ def get_news_headlines(self): for rawArticle in news: article = self.get_article_with_atag(rawArticle, parsed_uri) if self.can_process(article): - if config['news']['follow_link']: - body_url = article.url - for p in self.get_news_summary(body_url): - article.body += str(p)+" " + + # if config['news']['follow_link']: + # body_url = article.url + # for p in self.get_summary(body_url, 'div.a-sum p'): + # article.body += str(p)+" " article.referer_url = self.url articles.append(article) @@ -53,32 +54,10 @@ def get_news_headlines(self): def get_page_text(self, url): pass - def get_news_summary(self, url): - time.sleep(self.delay) - try: - req = requests.get(url) - html = req.text - soup = BeautifulSoup(html, 'html.parser') - html_p = soup.select('p.bullets_li') - - if html_p: - for i in html_p: - if i.text is not None: - yield i.text - else: - break - - except requests.exceptions.RequestException as exce: - logger.warning("Exception: can't crawl web site (%s)" % exce) - pass - - def get_analysis_summary(self, url): - time.sleep(self.delay) + def get_summary(self, url, selector): try: - req = requests.get(str(url)) - html = req.text - soup = BeautifulSoup(html, 'html.parser') - html_p = soup.select('div.a-sum p') + soup = self.get_soup(url) + html_p = soup.select(selector) if html_p: for i in html_p: diff --git a/src/StockSight/StockPriceListener.py b/src/StockSight/StockPriceListener.py index c0f81c3..3b2ced5 100644 --- a/src/StockSight/StockPriceListener.py +++ b/src/StockSight/StockPriceListener.py @@ -14,6 +14,7 @@ import datetime import re import requests +import os from pytz import timezone from StockSight.Initializer.ConfigReader import * @@ -34,7 +35,8 @@ def get_price(self, symbol): if self.index_name is None: self.index_name = config['elasticsearch']['table_prefix']['price']+symbol.lower() - url = "https://query1.finance.yahoo.com/v8/finance/chart/SYMBOL?region=US&lang=en-US&includePrePost=false&interval=2m&range=5d&corsDomain=finance.yahoo.com&.tsrc=finance" + tick_time = int(os.getenv('tick_time', 900)) / 60 + url = "https://query1.finance.yahoo.com/v8/finance/chart/SYMBOL?region=US&lang=en-US&includePrePost=false&interval=%sm&range=2d&corsDomain=finance.yahoo.com&.tsrc=finance" % tick_time current_timezone = timezone(config['stock_price']['timezone_str']) @@ -78,12 +80,23 @@ def get_price(self, symbol): D['change'] = 0 pass + D['high'] = data['chart']['result'][0]['indicators']['quote'][0]['high'][-1] if D['high'] is None: D['high'] = data['chart']['result'][0]['indicators']['quote'][0]['high'][-2] + D['low'] = data['chart']['result'][0]['indicators']['quote'][0]['low'][-1] if D['low'] is None: D['low'] = data['chart']['result'][0]['indicators']['quote'][0]['low'][-2] + + D['open'] = data['chart']['result'][0]['indicators']['quote'][0]['open'][-1] + if D['open'] is None: + D['open'] = data['chart']['result'][0]['indicators']['quote'][0]['open'][-2] + + D['close'] = data['chart']['result'][0]['indicators']['quote'][0]['close'][-1] + if D['close'] is None: + D['close'] = data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] + D['vol'] = data['chart']['result'][0]['indicators']['quote'][0]['volume'][-1] if D['vol'] is None: D['vol'] = data['chart']['result'][0]['indicators']['quote'][0]['volume'][-2] @@ -103,6 +116,8 @@ def get_price(self, symbol): "change": D['change'], "price_high": D['high'], "price_low": D['low'], + "price_open": D['open'], + "price_close": D['close'], "vol": D['vol'] }) else: diff --git a/src/StockSight/YahooFinanceListener.py b/src/StockSight/YahooFinanceListener.py index 0ec5e0a..e492f47 100644 --- a/src/StockSight/YahooFinanceListener.py +++ b/src/StockSight/YahooFinanceListener.py @@ -1,4 +1,6 @@ from StockSight.NewsHeadlineListener import * +import time +from random import randint class YahooFinanceListener(NewsHeadlineListener): @@ -13,10 +15,7 @@ def get_news_headlines(self): parsed_uri = urlparse.urljoin(self.url, '/') try: - - req = requests.get(self.url) - html = req.text - soup = BeautifulSoup(html, 'html.parser') + soup = self.get_soup(self.url) html = soup.findAll('h3') if html: @@ -41,9 +40,7 @@ def get_news_headlines(self): def get_page_text(self, url): max_paragraphs = 5 try: - req = requests.get(url) - html = req.text - soup = BeautifulSoup(html, 'html.parser') + soup = self.get_soup(url) html_p = soup.findAll('p') if html_p: diff --git a/src/startup.sh b/src/startup.sh index 487012e..0e310cd 100755 --- a/src/startup.sh +++ b/src/startup.sh @@ -15,7 +15,7 @@ until $(curl --output /dev/null --silent --head --fail "$host"); do sleep 5 done -# First wait for ES to start... +# wait for ES to start... response=$(curl $host) until [ "$response" = "200" ]; do response=$(curl --write-out %{http_code} --silent --output /dev/null "$host") @@ -35,7 +35,7 @@ until [ "$health" = 'green' ] || [ "$health" = 'yellow' ]; do sleep 5 done -# First wait for Kibana to start... +# wait for Kibana to start... response=$(curl $kibanahost) until [ "$response" = "200" ]; do response=$(curl --write-out %{http_code} --silent --output /dev/null "$kibanahost") @@ -50,12 +50,12 @@ while [[ "$kibana_health" == *"Kibana server is not ready yet"* ]]; do sleep 5 done +#Copy kibana dashboards echo "Copy kibana dashboard if they don't exist"; python import.kibana.py & -tick_time=900 tick=0 -let sentiment_time=900*4 +let sentiment_time=$tick_time*$news_cycle #echo "Spawning Tweet Sentiment receiver instance"; #python tweet.sentiment.py & @@ -65,7 +65,7 @@ do echo "Spawning stock price receiver instance"; python stockprice.py & echo "Will get stock data again in ${tick_time} sec..."; - let tick_mod=tick%4 + let tick_mod=tick%$news_cycle if [ $tick_mod -eq 0 ] then diff --git a/tests/StockSight/Helper/RequestRandomizerTest.py b/tests/StockSight/Helper/RequestRandomizerTest.py new file mode 100644 index 0000000..190d381 --- /dev/null +++ b/tests/StockSight/Helper/RequestRandomizerTest.py @@ -0,0 +1,31 @@ +import unittest +from StockSight.Helper.RequestRandomizer import RequestRandomizer +from StockSight.Initializer.ConfigReader import config +from StockSight.Initializer.Redis import rds + +class RequestRandomizerTest(unittest.TestCase): + + def setUp(self): + config['redis']['db'] = 1 + + + def tearDown(self): + rds.flushdb() + + def test_get_a_proxy(self): + proxy_ip = RequestRandomizer.get_a_proxy() + self.assertIn(':', proxy_ip, 'Invalid IP format') + + def test_get_a_proxy_reliability(self): + for x in range(10): + proxy_ip = RequestRandomizer.get_a_proxy() + self.assertIn(':', proxy_ip, ('Invalid IP format at %s' % x)) + + def test_get_a_user_agent(self): + ua = RequestRandomizer.get_a_user_agent() + self.assertIn('Firefox', ua, ('User agent isn\'t firefox')) + + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/tests/StockSight/SeekAlphaListenerTest.py b/tests/StockSight/SeekAlphaListenerTest.py index 438a029..52d7112 100644 --- a/tests/StockSight/SeekAlphaListenerTest.py +++ b/tests/StockSight/SeekAlphaListenerTest.py @@ -6,7 +6,7 @@ class SeekAlphaListenerTest(unittest.TestCase): - symbol = 'tsla' + symbol = 'amd' def setUp(self): config['redis']['db'] = 1 @@ -33,6 +33,7 @@ def test_get_news_headlines(self): self.assertIsNotNone(headlines[0].url, "URL is empty") self.assertIsNotNone(headlines[0].referer_url, "Refer URL is empty") + #always fails b/c of 403. def test_get_news_headlines_with_body(self): config['news']['follow_link'] = True headlines = self.mainClass.get_news_headlines() @@ -41,21 +42,13 @@ def test_get_news_headlines_with_body(self): self.assertIsNotNone(headlines[0].title, "Title is empty") self.assertIsNotNone(headlines[0].url, "URL is empty") self.assertNotEqual(headlines[0].referer_url, '', "Refer URL is empty") - has_article_body = False - has_news_body = False + + empty_bodies = 0 for message in headlines: if message.body == '': - continue - elif message.url.find('article') > -1: - has_article_body = True - elif message.url.find('news') > -1: - has_news_body = True - - if has_article_body and has_news_body: - break; + empty_bodies += 1 - self.assertEqual(has_news_body, True, "News body is empty") - self.assertEqual(has_article_body, True, "Article body is empty") + self.assertAlmostEqual(empty_bodies, 0, None, "There are %s empty bodies in %s headlines" % (empty_bodies, headlines.__len__()), 5) def test_execute(self): From f84a379e6d7b3647987af4701034c7a854fd9c81 Mon Sep 17 00:00:00 2001 From: heyqule Date: Sat, 14 Sep 2019 16:47:49 -0400 Subject: [PATCH 45/55] - Kibana adjustment --- src/StockSight/StockPriceListener.py | 3 ++- src/kibana_export/export.7.3.ndjson | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/StockSight/StockPriceListener.py b/src/StockSight/StockPriceListener.py index 3b2ced5..aeb0032 100644 --- a/src/StockSight/StockPriceListener.py +++ b/src/StockSight/StockPriceListener.py @@ -105,7 +105,8 @@ def get_price(self, symbol): raise # check before adding to ES - if D['last'] is not None and D['high'] is not None and D['low'] is not None: + if D['last'] is not None and D['high'] is not None and D['low'] is not None \ + and D['open'] is not None and D['close'] is not None: logger.info("Adding stock data to Elasticsearch...") # add stock price info to elasticsearch es.index(index=self.index_name, diff --git a/src/kibana_export/export.7.3.ndjson b/src/kibana_export/export.7.3.ndjson index 37300ef..74c36ea 100644 --- a/src/kibana_export/export.7.3.ndjson +++ b/src/kibana_export/export.7.3.ndjson @@ -1,4 +1,4 @@ -{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"esTypes\":[\"_id\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"esTypes\":[\"_index\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"esTypes\":[\"_source\"],\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"esTypes\":[\"_type\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"change\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"date\",\"type\":\"date\",\"esTypes\":[\"date\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"message\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"message.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"message\",\"subType\":\"multi\"},{\"name\":\"msg_id\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"msg_id.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"msg_id\",\"subType\":\"multi\"},{\"name\":\"polarity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_high\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_last\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_low\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"referer_url\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"referer_url.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"referer_url\",\"subType\":\"multi\"},{\"name\":\"sentiment\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"sentiment.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"sentiment\",\"subType\":\"multi\"},{\"name\":\"subjectivity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"symbol\",\"type\":\"string\",\"esTypes\":[\"text\",\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"symbol.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"symbol\",\"subType\":\"multi\"},{\"name\":\"title\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"title.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"title\",\"subType\":\"multi\"},{\"name\":\"url\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"url.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"url\",\"subType\":\"multi\"},{\"name\":\"vol\",\"type\":\"number\",\"esTypes\":[\"integer\",\"long\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"date","title":"stocksight_*_tmpl"},"id":"tmpl-index","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2019-09-10T02:41:09.242Z","version":"WzM5LDNd"} +{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"esTypes\":[\"_id\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"esTypes\":[\"_index\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"esTypes\":[\"_source\"],\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"esTypes\":[\"_type\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"author\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"author.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"author\",\"subType\":\"multi\"},{\"name\":\"change\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"date\",\"type\":\"date\",\"esTypes\":[\"date\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"location\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"location.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"location\",\"subType\":\"multi\"},{\"name\":\"message\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"msg_id\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"msg_polarity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"msg_sentiment\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"msg_sentiment.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"msg_sentiment\",\"subType\":\"multi\"},{\"name\":\"msg_subjectivity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"polarity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_close\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_high\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_last\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_low\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_open\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"referer_url\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"referer_url.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"referer_url\",\"subType\":\"multi\"},{\"name\":\"sentiment\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"sentiment.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"sentiment\",\"subType\":\"multi\"},{\"name\":\"subjectivity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"symbol\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"title\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"title.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"title\",\"subType\":\"multi\"},{\"name\":\"url\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"url.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"url\",\"subType\":\"multi\"},{\"name\":\"vol\",\"type\":\"number\",\"esTypes\":[\"integer\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"date","title":"stocksight_*_tmpl"},"id":"tmpl-index","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2019-09-14T20:37:32.695Z","version":"Wzk2LDJd"} {"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Polarity Dashboard","uiStateJSON":"{}","version":1,"visState":"{\"aggs\":[{\"enabled\":true,\"id\":\"1\",\"params\":{},\"schema\":\"metric\",\"type\":\"count\"},{\"enabled\":true,\"id\":\"2\",\"params\":{\"field\":\"polarity\"},\"schema\":\"metric\",\"type\":\"avg\"},{\"enabled\":true,\"id\":\"3\",\"params\":{\"field\":\"polarity\",\"percents\":[50]},\"schema\":\"metric\",\"type\":\"median\"},{\"enabled\":true,\"id\":\"4\",\"params\":{\"field\":\"polarity\"},\"schema\":\"metric\",\"type\":\"max\"},{\"enabled\":true,\"id\":\"5\",\"params\":{\"field\":\"polarity\"},\"schema\":\"metric\",\"type\":\"min\"}],\"params\":{\"addLegend\":false,\"addTooltip\":true,\"dimensions\":{\"metrics\":[{\"accessor\":0,\"format\":{\"id\":\"number\",\"params\":{}},\"type\":\"vis_dimension\"}]},\"metric\":{\"colorSchema\":\"Green to Red\",\"colorsRange\":[{\"from\":0,\"to\":10000,\"type\":\"range\"}],\"invertColors\":false,\"labels\":{\"show\":true},\"metricColorMode\":\"None\",\"percentageMode\":false,\"style\":{\"bgColor\":false,\"bgFill\":\"#000\",\"fontSize\":60,\"labelColor\":false,\"subText\":\"\"},\"useRanges\":false},\"type\":\"metric\"},\"title\":\"tmpl - Polarity Dashboard\",\"type\":\"metric\"}"},"id":"tmpl-polarity-dashboard","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T02:44:45.235Z","version":"WzQwLDNd"} {"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Article List","uiStateJSON":"{\"vis\":{\"params\":{\"sort\":{\"columnIndex\":null,\"direction\":null}}}}","version":1,"visState":"{\"title\":\"tmpl - Article List\",\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":null,\"direction\":null},\"showTotal\":false,\"totalFunc\":\"sum\",\"dimensions\":{\"metrics\":[{\"accessor\":2,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"},{\"accessor\":3,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"}],\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"},{\"accessor\":1,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"subjectivity\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"title.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":100,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"Title\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"url.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":50,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"URL\"}}]}"},"id":"tmpl-article-list","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T02:56:23.360Z","version":"WzQzLDNd"} {"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Article Imports","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Article Imports\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false,\"dimensions\":{\"x\":null,\"y\":[{\"accessor\":0,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"useNormalizedEsInterval\":true,\"interval\":\"auto\",\"drop_partials\":false,\"min_doc_count\":1,\"extended_bounds\":{}}}]}"},"id":"tmpl-article-imports","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T03:11:30.053Z","version":"WzQ4LDNd"} From 60e06fcbb4e365cfe3272fb416bf73e08b46dbc5 Mon Sep 17 00:00:00 2001 From: heyqule Date: Sat, 14 Sep 2019 18:17:10 -0400 Subject: [PATCH 46/55] - Config adjustment - Fix Readme - add env_var to allow overwrite of kibana dashboard --- README.md | 71 +++++++++++++++----------- docker-compose.yml | 8 +-- src/StockSight/NewsHeadlineListener.py | 4 +- src/StockSight/StockPriceListener.py | 2 +- src/StockSight/TweetListener.py | 4 +- src/config.sample.yml | 2 +- src/import.kibana.py | 13 +++-- src/news.sentiment.py | 4 +- src/stockprice.py | 2 +- src/tweet.sentiment.py | 2 +- 10 files changed, 66 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index c651f9a..bc6894f 100644 --- a/README.md +++ b/README.md @@ -6,53 +6,66 @@ Crowd-sourced stock analyzer and stock predictor using Elasticsearch, Twitter, N [![License](https://img.shields.io/github/license/shirosaidev/stocksight.svg?label=License&maxAge=86400)](./LICENSE) [![Release](https://img.shields.io/github/release/shirosaidev/stocksight.svg?label=Release&maxAge=60)](https://github.com/shirosaidev/stocksight/releases/latest) -Original Author (Chris Park) -[![Sponsor Patreon](https://img.shields.io/badge/Sponsor%20%24-Patreon-brightgreen.svg)](https://www.patreon.com/shirosaidev) -[![Donate PayPal](https://img.shields.io/badge/Donate%20%24-PayPal-brightgreen.svg)](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=CLF223XAS4W72) - -Docker and new features author (Allen Jian Feng Xie) -[![Donate PayPal](https://img.shields.io/badge/Donate%20%24-PayPal-brightgreen.svg)](https://www.paypal.com/paypalme2/heyqule) - - ## About stocksight is a crowd-sourced stock analysis open source software that uses Elasticsearch to store Twitter and news headlines data for stocks. stocksight analyzes the emotions of what the author writes and does sentiment analysis on the text to determine how the author "feels" about a stock. stocksight makes an aggregated analysis of all collected data from all sources. Each user running stocksight has a unique fingerprint: specific stocks they are following, news sites and twitter users they follow to find information for those stocks. This creates a unique sentiment analysis for each user, based on what data sources they are getting stocksight to search. Users can have the same stocks, but their data sources could vary significantly creating different sentiment analysis for the same stock. stocksight website will allow each user to see other sentiment analysis results from other stocksight user app results and a combined aggregated view of all. -## Requirements -- Python 3. (tested with Python 3.6.8 and 3.7.4) -- Elasticsearch 7.3.1. -- Kibana 7.3.1. -- elasticsearch python module -- nltk python module -- requests python module -- tweepy python module -- beautifulsoup4 python module -- textblob python module -- vaderSentiment python module -- pytz -- redis -- pyyaml - ### Download ```shell -$ git clone https://github.com/shirosaidev/stocksight.git +$ git clone https://github.com/heyqule/stocksight.git $ cd stocksight ``` [Download latest version](https://github.com/shirosaidev/stocksight/releases/latest) ### How to setup -- Copy config.sample.py to config.py -- Change the setting in config.py to fit your need -- run "docker-compose up" +- Copy src/config.yml to src/config.yml +- Change settings in config.yml to fit your need + - Change ElasticSearch credential (elasticuser:user) + - Change analyzer ignore words (sentiment_analyzer:ignore_words) + - If you want to run twitter analyzer, change the setting in twitter section + - Uncomment ""#python tweet.sentiment.py &" in src/startup.sh + - Add desired stock symbol and require words to symbols section (symbol: tsla) +- Change run interval in docker-composer.yml + - default, 900 seconds for stock price, 3600 seconds for news crawler +- Run "docker-compose up" - ??? - Profit ### How to use The following action require to run in to the python3 container. + ###### Delete Elastic Indexes +- Log into python docker console +- Run "python delindex.py --delindex {index_name}" + +###### Update Kibana Dashboard Template +- make change to your existing template and visualizations. +- export them to kibana_export/export.7.3.ndjson +- replace symbol with "tmpl" or change the id and index value to match existing ndjson. +- run "KIBANA_OVERWRITE=true python import.kibana.py" + -###### Update twitteruserid.txt +### Authors +Original Author (Chris Park) +[![Sponsor Patreon](https://img.shields.io/badge/Sponsor%20%24-Patreon-brightgreen.svg)](https://www.patreon.com/shirosaidev) +[![Donate PayPal](https://img.shields.io/badge/Donate%20%24-PayPal-brightgreen.svg)](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=CLF223XAS4W72) -###### Update Kibana Dashboard \ No newline at end of file +Docker and new features author (Allen Jian Feng Xie) +[![Donate PayPal](https://img.shields.io/badge/Donate%20%24-PayPal-brightgreen.svg)](https://www.paypal.com/paypalme2/heyqule) + +### Tech Stack +- Python 3. (tested with Python 3.6.8 and 3.7.4) +- Elasticsearch 7.3.1. +- Kibana 7.3.1. +- elasticsearch python module +- nltk python module +- requests python module +- tweepy python module +- beautifulsoup4 python module +- textblob python module +- vaderSentiment python module +- pytz +- redis +- pyyaml diff --git a/docker-compose.yml b/docker-compose.yml index 231f7e1..4070ab4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -24,15 +24,15 @@ services: soft: 4096 hard: 4096 #expose this port for local dev only! - ports: - - "9200:9200" + #ports: + # - "9200:9200" restart: unless-stopped redis: build: context: ./redis-docker #expose this port for local dev only! - ports: - - "6379:6379" + #ports: + # - "6379:6379" restart: unless-stopped kibana: image: docker.elastic.co/kibana/kibana:7.3.1 diff --git a/src/StockSight/NewsHeadlineListener.py b/src/StockSight/NewsHeadlineListener.py index 0b4a08b..1898678 100644 --- a/src/StockSight/NewsHeadlineListener.py +++ b/src/StockSight/NewsHeadlineListener.py @@ -70,8 +70,8 @@ def execute(self): continue nltk_tokens = [] - if self.symbol in config['tickers']: - nltk_tokens = config['tickers'][self.symbol] + if self.symbol in config['symbols']: + nltk_tokens = config['symbols'][self.symbol] # check required tokens from config tokenspass = False diff --git a/src/StockSight/StockPriceListener.py b/src/StockSight/StockPriceListener.py index aeb0032..d0ef18e 100644 --- a/src/StockSight/StockPriceListener.py +++ b/src/StockSight/StockPriceListener.py @@ -35,7 +35,7 @@ def get_price(self, symbol): if self.index_name is None: self.index_name = config['elasticsearch']['table_prefix']['price']+symbol.lower() - tick_time = int(os.getenv('tick_time', 900)) / 60 + tick_time = int(int(os.getenv('tick_time', 900)) / 60) url = "https://query1.finance.yahoo.com/v8/finance/chart/SYMBOL?region=US&lang=en-US&includePrePost=false&interval=%sm&range=2d&corsDomain=finance.yahoo.com&.tsrc=finance" % tick_time current_timezone = timezone(config['stock_price']['timezone_str']) diff --git a/src/StockSight/TweetListener.py b/src/StockSight/TweetListener.py index eb55811..c438630 100644 --- a/src/StockSight/TweetListener.py +++ b/src/StockSight/TweetListener.py @@ -113,9 +113,9 @@ def on_data(self, data): return True # check required tokens from config tokenspass = False - for key in config['tickers']: + for key in config['symbols']: self.symbol = key - for t in config['tickers'][key]: + for t in config['symbols'][key]: if t in tokens: tokenspass = True break diff --git a/src/config.sample.yml b/src/config.sample.yml index a73b44a..65d9910 100644 --- a/src/config.sample.yml +++ b/src/config.sample.yml @@ -46,7 +46,7 @@ stock_price: hour_end : 17 timezone_str : America/Toronto -tickers: +symbols: tsla: - tesla - tsla diff --git a/src/import.kibana.py b/src/import.kibana.py index 92afc62..e8b649a 100644 --- a/src/import.kibana.py +++ b/src/import.kibana.py @@ -11,6 +11,7 @@ """ import requests import sys +import os from StockSight.Initializer.ConfigReader import * if __name__ == '__main__': @@ -20,7 +21,7 @@ import_template = template_file.read() template_file.close() - for symbol in config['tickers']: + for symbol in config['symbols']: try: print("Starting %s Kibana Dashboard Import" % symbol) ndjson_file_path = 'kibana_export/'+symbol+'_exports.ndjson' @@ -30,9 +31,15 @@ ndjson_file.close() kibana_import_url = 'http://kibana:5601/api/saved_objects/_import' - payload = {'overwrite': 'false'} + + overwrite = os.getenv('KIBANA_OVERWRITE', False) + if overwrite is False: + payload = {} + else: + payload = {'overwrite': 'true'} + headers = {'kbn-xsrf': 'True'} - post = requests.request('POST', kibana_import_url, headers=headers, files={'file': open(ndjson_file_path, "rt", encoding='utf-8')}) + post = requests.request('POST', kibana_import_url, params=payload, headers=headers, files={'file': open(ndjson_file_path, "rt", encoding='utf-8')}) print("Imported %s Kibana Dashboard" % symbol) print(ndjson_file_path) print(post.text) diff --git a/src/news.sentiment.py b/src/news.sentiment.py index d006769..9fc626d 100644 --- a/src/news.sentiment.py +++ b/src/news.sentiment.py @@ -54,12 +54,12 @@ requestslogger.disabled = True try: - for symbol in config['tickers']: + for symbol in config['symbols']: try: logger.info('Creating new Sentiment index or using existing ' + symbol) es.indices.create(index=config['elasticsearch']['table_prefix']['sentiment']+symbol.lower(), body=mapping, ignore=[400, 404]) - logger.info('NLTK tokens required: ' + str(config['tickers'][symbol])) + logger.info('NLTK tokens required: ' + str(config['symbols'][symbol])) logger.info('NLTK tokens ignored: ' + str(config['sentiment_analyzer']['ignore_words'])) yahooListener = YahooFinanceListener(symbol) diff --git a/src/stockprice.py b/src/stockprice.py index 1758836..782c4de 100644 --- a/src/stockprice.py +++ b/src/stockprice.py @@ -79,7 +79,7 @@ requestslogger.disabled = True try: - for symbol in config['tickers']: + for symbol in config['symbols']: try: logger.info('Creating new Price index or using existing ' + symbol) es.indices.create(index=config['elasticsearch']['table_prefix']['price']+symbol.lower(), diff --git a/src/tweet.sentiment.py b/src/tweet.sentiment.py index 207a5f8..ef683d5 100644 --- a/src/tweet.sentiment.py +++ b/src/tweet.sentiment.py @@ -126,7 +126,7 @@ raise # search twitter for keywords - logger.info('NLTK tokens required: ' + str(config['tickers'])) + logger.info('NLTK tokens required: ' + str(config['symbols'])) logger.info('NLTK tokens ignored: ' + str(config['sentiment_analyzer']['ignore_words'])) logger.info('Twitter Feeds: ' + str(twitter_feeds)) logger.info('Twitter User Ids: ' + str(useridlist)) From 0d7c7a4c104ff84343ed6342767a764c9e689e84 Mon Sep 17 00:00:00 2001 From: heyqule Date: Tue, 17 Sep 2019 01:42:13 -0400 Subject: [PATCH 47/55] - Improve Kibana dashboard - - Fix copyright --- README.md | 38 ++++++------- docker-compose.yml | 16 +++--- src/StockSight/Helper/RequestRandomizer.py | 14 ++++- src/StockSight/Helper/Sentiment.py | 8 +-- src/StockSight/Initializer/ConfigReader.py | 9 +++ src/StockSight/Initializer/ElasticSearch.py | 8 +-- src/StockSight/Initializer/Logger.py | 24 ++++++-- src/StockSight/Initializer/Redis.py | 7 +-- src/StockSight/Model/Article.py | 9 +++ src/StockSight/NewsHeadlineListener.py | 6 +- src/StockSight/SeekAlphaListener.py | 12 ++++ src/StockSight/StockPriceListener.py | 22 ++++---- src/StockSight/TweetListener.py | 2 +- src/StockSight/YahooFinanceListener.py | 14 ++++- src/config.sample.yml | 20 ++++--- src/definitions.py | 11 ++++ src/kibana_export/export.7.3.ndjson | 14 ++--- src/news.sentiment.py | 29 +--------- src/startup.sh | 18 +++--- src/stockprice.py | 55 +------------------ src/tweet.sentiment.py | 31 +---------- .../Helper/RequestRandomizerTest.py | 8 +++ tests/StockSight/SeekAlphaListenerTest.py | 13 ++++- tests/StockSight/StockPriceListenerTest.py | 10 +++- tests/StockSight/YahooFinanceListenerTest.py | 10 +++- 25 files changed, 205 insertions(+), 203 deletions(-) diff --git a/README.md b/README.md index bc6894f..3817172 100644 --- a/README.md +++ b/README.md @@ -6,22 +6,31 @@ Crowd-sourced stock analyzer and stock predictor using Elasticsearch, Twitter, N [![License](https://img.shields.io/github/license/shirosaidev/stocksight.svg?label=License&maxAge=86400)](./LICENSE) [![Release](https://img.shields.io/github/release/shirosaidev/stocksight.svg?label=Release&maxAge=60)](https://github.com/shirosaidev/stocksight/releases/latest) +### Authors +Original Author (Chris Park) +[![Sponsor Patreon](https://img.shields.io/badge/Sponsor%20%24-Patreon-brightgreen.svg)](https://www.patreon.com/shirosaidev) +[![Donate PayPal](https://img.shields.io/badge/Donate%20%24-PayPal-brightgreen.svg)](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=CLF223XAS4W72) + +Docker and new features author (Allen Jian Feng Xie) +[![Donate PayPal](https://img.shields.io/badge/Donate%20%24-PayPal-brightgreen.svg)](https://www.paypal.com/paypalme2/heyqule) + ## About stocksight is a crowd-sourced stock analysis open source software that uses Elasticsearch to store Twitter and news headlines data for stocks. stocksight analyzes the emotions of what the author writes and does sentiment analysis on the text to determine how the author "feels" about a stock. stocksight makes an aggregated analysis of all collected data from all sources. Each user running stocksight has a unique fingerprint: specific stocks they are following, news sites and twitter users they follow to find information for those stocks. This creates a unique sentiment analysis for each user, based on what data sources they are getting stocksight to search. Users can have the same stocks, but their data sources could vary significantly creating different sentiment analysis for the same stock. stocksight website will allow each user to see other sentiment analysis results from other stocksight user app results and a combined aggregated view of all. -### Download +### Requirement + +Install Docker on your system ```shell $ git clone https://github.com/heyqule/stocksight.git $ cd stocksight ``` -[Download latest version](https://github.com/shirosaidev/stocksight/releases/latest) ### How to setup - Copy src/config.yml to src/config.yml -- Change settings in config.yml to fit your need +- Change settings in config.yml to fit your needs - Change ElasticSearch credential (elasticuser:user) - Change analyzer ignore words (sentiment_analyzer:ignore_words) - If you want to run twitter analyzer, change the setting in twitter section @@ -34,26 +43,17 @@ $ cd stocksight - Profit ### How to use -The following action require to run in to the python3 container. +The following action require to run in the python3 container. ###### Delete Elastic Indexes -- Log into python docker console -- Run "python delindex.py --delindex {index_name}" +1. Log into python docker console +2. Run "python delindex.py --delindex {index_name}" ###### Update Kibana Dashboard Template -- make change to your existing template and visualizations. -- export them to kibana_export/export.7.3.ndjson -- replace symbol with "tmpl" or change the id and index value to match existing ndjson. -- run "KIBANA_OVERWRITE=true python import.kibana.py" - - -### Authors -Original Author (Chris Park) -[![Sponsor Patreon](https://img.shields.io/badge/Sponsor%20%24-Patreon-brightgreen.svg)](https://www.patreon.com/shirosaidev) -[![Donate PayPal](https://img.shields.io/badge/Donate%20%24-PayPal-brightgreen.svg)](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=CLF223XAS4W72) - -Docker and new features author (Allen Jian Feng Xie) -[![Donate PayPal](https://img.shields.io/badge/Donate%20%24-PayPal-brightgreen.svg)](https://www.paypal.com/paypalme2/heyqule) +1. Make change to your existing template and visualizations. +2. Export them to kibana_export/export.7.3.ndjson +3. Replace symbol with "tmpl" or change the id and index value to match existing ndjson. +4. Run "KIBANA_OVERWRITE=true python import.kibana.py" ### Tech Stack - Python 3. (tested with Python 3.6.8 and 3.7.4) diff --git a/docker-compose.yml b/docker-compose.yml index 4070ab4..96c6f0c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -24,15 +24,15 @@ services: soft: 4096 hard: 4096 #expose this port for local dev only! - #ports: - # - "9200:9200" + ports: + - "9200:9200" restart: unless-stopped redis: build: context: ./redis-docker #expose this port for local dev only! - #ports: - # - "6379:6379" + ports: + - "6379:6379" restart: unless-stopped kibana: image: docker.elastic.co/kibana/kibana:7.3.1 @@ -45,10 +45,10 @@ services: build: context: ./python-docker environment: - #interval for getting stock price (in seconds) - - tick_time=900 - #interval for getting stock news (tick_time * news_cycle) - - news_cycle=4 + #interval for getting stock price in seconds + - stockprice_tick_time=120 + #interval for getting stock news in seconds + - news_sentiment_tick_time=3600 depends_on: - elasticsearch - redis diff --git a/src/StockSight/Helper/RequestRandomizer.py b/src/StockSight/Helper/RequestRandomizer.py index eaa17b8..9c41a20 100644 --- a/src/StockSight/Helper/RequestRandomizer.py +++ b/src/StockSight/Helper/RequestRandomizer.py @@ -1,7 +1,14 @@ -# implement random proxy +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +implement random proxy / random user agent -#https://github.com/hellysmile/fake-useragent implement random user-agent +Copyright (C) Allen (Jian Feng) Xie 2019 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" +#https://github.com/hellysmile/fake-useragent implement random user-agent from fake_useragent import UserAgent import requests from random import randint @@ -10,6 +17,7 @@ class RequestRandomizer: + # It doesn't test validate the proxy server. That should be handled by the requests in the caller method @staticmethod def get_a_proxy(): ip_list = RequestRandomizer.get_raw_ip_list() @@ -39,7 +47,7 @@ def get_raw_ip_list(cls): return ip_list - + # Get a fake user agent. It may need some work to implement newer browsers. @staticmethod def get_a_user_agent(): ua = UserAgent() diff --git a/src/StockSight/Helper/Sentiment.py b/src/StockSight/Helper/Sentiment.py index 5621c18..1d8cfd0 100644 --- a/src/StockSight/Helper/Sentiment.py +++ b/src/StockSight/Helper/Sentiment.py @@ -1,11 +1,9 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -"""stockprice.py - get stock price from Yahoo and add to -Elasticsearch. -See README.md or https://github.com/shirosaidev/stocksight -for more information. +""" +Sentiment Helper -Copyright (C) Chris Park 2018 +Copyright (C) Chris Park 2018-2019 Copyright (C) Allen (Jian Feng) Xie 2019 stocksight is released under the Apache 2.0 license. See LICENSE for the full license text. diff --git a/src/StockSight/Initializer/ConfigReader.py b/src/StockSight/Initializer/ConfigReader.py index 4b382fb..bc35736 100644 --- a/src/StockSight/Initializer/ConfigReader.py +++ b/src/StockSight/Initializer/ConfigReader.py @@ -1,3 +1,12 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Global Config Reader + +Copyright (C) Allen (Jian Feng) Xie 2019 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" import yaml from definitions import PROJECT_SRC_PATH diff --git a/src/StockSight/Initializer/ElasticSearch.py b/src/StockSight/Initializer/ElasticSearch.py index 5e04970..7938c83 100644 --- a/src/StockSight/Initializer/ElasticSearch.py +++ b/src/StockSight/Initializer/ElasticSearch.py @@ -1,11 +1,9 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -"""stockprice.py - get stock price from Yahoo and add to -Elasticsearch. -See README.md or https://github.com/shirosaidev/stocksight -for more information. +""" +Global Elastic Search Handler -Copyright (C) Chris Park 2018 +Copyright (C) Chris Park 2018-2019 Copyright (C) Allen (Jian Feng) Xie 2019 stocksight is released under the Apache 2.0 license. See LICENSE for the full license text. diff --git a/src/StockSight/Initializer/Logger.py b/src/StockSight/Initializer/Logger.py index a8c2554..6f895f0 100644 --- a/src/StockSight/Initializer/Logger.py +++ b/src/StockSight/Initializer/Logger.py @@ -1,18 +1,16 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -"""stockprice.py - get stock price from Yahoo and add to -Elasticsearch. -See README.md or https://github.com/shirosaidev/stocksight -for more information. +""" +Global Loggers -Copyright (C) Chris Park 2018 +Copyright (C) Chris Park 2018-2019 Copyright (C) Allen (Jian Feng) Xie 2019 stocksight is released under the Apache 2.0 license. See LICENSE for the full license text. """ import logging - +from StockSight.Initializer.ConfigReader import config # set up logging logger = logging.getLogger('stocksight') @@ -36,3 +34,17 @@ logformatter = '%(asctime)s [%(levelname)s][%(name)s] %(message)s' loglevel = logging.INFO logging.basicConfig(format=logformatter, level=loglevel) + + +if config['console_output_mode'] is 'verbose': + logger.setLevel(logging.INFO) + eslogger.setLevel(logging.INFO) + requestslogger.setLevel(logging.INFO) +elif config['console_output_mode'] is 'debug': + logger.setLevel(logging.DEBUG) + eslogger.setLevel(logging.DEBUG) + requestslogger.setLevel(logging.DEBUG) +elif config['console_output_mode'] is 'quiet': + logger.disabled = True + eslogger.disabled = True + requestslogger.disabled = True diff --git a/src/StockSight/Initializer/Redis.py b/src/StockSight/Initializer/Redis.py index 1211e9a..dd328e4 100644 --- a/src/StockSight/Initializer/Redis.py +++ b/src/StockSight/Initializer/Redis.py @@ -1,9 +1,8 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -"""stockprice.py - get stock price from Yahoo and add to -Elasticsearch. -See README.md or https://github.com/shirosaidev/stocksight -for more information. +""" +Redis Global handler + Copyright (C) Allen (Jian Feng) Xie 2019 stocksight is released under the Apache 2.0 license. See diff --git a/src/StockSight/Model/Article.py b/src/StockSight/Model/Article.py index fd59b47..1e0152f 100644 --- a/src/StockSight/Model/Article.py +++ b/src/StockSight/Model/Article.py @@ -1,3 +1,12 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Article Data Holder + +Copyright (C) Allen (Jian Feng) Xie 2019 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" import hashlib class Article: diff --git a/src/StockSight/NewsHeadlineListener.py b/src/StockSight/NewsHeadlineListener.py index 1898678..40a69ae 100644 --- a/src/StockSight/NewsHeadlineListener.py +++ b/src/StockSight/NewsHeadlineListener.py @@ -5,7 +5,7 @@ See README.md or https://github.com/shirosaidev/stocksight for more information. -Copyright (C) Chris Park 2018 +Copyright (C) Chris Park 2018-2019 Copyright (C) Allen (Jian Feng) Xie 2019 stocksight is released under the Apache 2.0 license. See LICENSE for the full license text. @@ -24,7 +24,6 @@ except ImportError: import urlparse -from StockSight.Initializer.ConfigReader import config from StockSight.Initializer.ElasticSearch import es from StockSight.Initializer.Redis import rds from StockSight.Helper.Sentiment import * @@ -139,8 +138,9 @@ def can_process(self, article): return article is not None and rds.exists(article.msg_id) is 0 def get_soup(self, url): + #try not to spam the server, but if you run with 100 stock symbols, it's probably going to spam it anyway lol. time.sleep(randint(1,3)) - req = requests.get(self.url) + req = requests.get(url) html = req.text soup = BeautifulSoup(html, 'html.parser') return soup diff --git a/src/StockSight/SeekAlphaListener.py b/src/StockSight/SeekAlphaListener.py index 782e80f..45f5a77 100644 --- a/src/StockSight/SeekAlphaListener.py +++ b/src/StockSight/SeekAlphaListener.py @@ -1,3 +1,15 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""SeekAlphaListener.py - get headline sentiment from SeekingAlpha and add to +Elasticsearch. +See README.md or https://github.com/shirosaidev/stocksight +for more information. + +Copyright (C) Allen (Jian Feng) Xie 2019 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" + from StockSight.NewsHeadlineListener import * import time from random import randint diff --git a/src/StockSight/StockPriceListener.py b/src/StockSight/StockPriceListener.py index d0ef18e..6a5ac6b 100644 --- a/src/StockSight/StockPriceListener.py +++ b/src/StockSight/StockPriceListener.py @@ -5,7 +5,7 @@ See README.md or https://github.com/shirosaidev/stocksight for more information. -Copyright (C) Chris Park 2018 +Copyright (C) Chris Park 2018-2019 Copyright (C) Allen (Jian Feng) Xie 2019 stocksight is released under the Apache 2.0 license. See LICENSE for the full license text. @@ -14,7 +14,6 @@ import datetime import re import requests -import os from pytz import timezone from StockSight.Initializer.ConfigReader import * @@ -35,8 +34,7 @@ def get_price(self, symbol): if self.index_name is None: self.index_name = config['elasticsearch']['table_prefix']['price']+symbol.lower() - tick_time = int(int(os.getenv('tick_time', 900)) / 60) - url = "https://query1.finance.yahoo.com/v8/finance/chart/SYMBOL?region=US&lang=en-US&includePrePost=false&interval=%sm&range=2d&corsDomain=finance.yahoo.com&.tsrc=finance" % tick_time + url = "https://query1.finance.yahoo.com/v8/finance/chart/SYMBOL?region=US&lang=en-US&includePrePost=false&interval=2m&range=2d&corsDomain=finance.yahoo.com&.tsrc=finance" current_timezone = timezone(config['stock_price']['timezone_str']) @@ -66,19 +64,19 @@ def get_price(self, symbol): if D['last'] is None: D['last'] = data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] D['date'] = time.strftime('%Y-%m-%dT%H:%M:%S', time.gmtime()) # time now in gmt (utc) + try: D['change'] = (data['chart']['result'][0]['indicators']['quote'][0]['close'][-1] - - data['chart']['result'][0]['indicators']['quote'][0]['open'][-1]) / \ - data['chart']['result'][0]['indicators']['quote'][0]['open'][-1] * 100 + data['chart']['result'][0]['indicators']['quote'][0]['close'][-2]) / \ + data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] * 100 except TypeError: - if data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] is not None and \ - data['chart']['result'][0]['indicators']['quote'][0]['open'][-2] is not None: + if(data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] is not None and \ + data['chart']['result'][0]['indicators']['quote'][0]['open'][-3] is not None): D['change'] = (data['chart']['result'][0]['indicators']['quote'][0]['close'][-2] - - data['chart']['result'][0]['indicators']['quote'][0]['open'][-2]) / \ - data['chart']['result'][0]['indicators']['quote'][0]['open'][-2] * 100 + data['chart']['result'][0]['indicators']['quote'][0]['close'][-3]) / \ + data['chart']['result'][0]['indicators']['quote'][0]['close'][-3] * 100 else: - D['change'] = 0 - + D['change'] = 0; pass D['high'] = data['chart']['result'][0]['indicators']['quote'][0]['high'][-1] diff --git a/src/StockSight/TweetListener.py b/src/StockSight/TweetListener.py index c438630..32efc3e 100644 --- a/src/StockSight/TweetListener.py +++ b/src/StockSight/TweetListener.py @@ -5,7 +5,7 @@ See README.md or https://github.com/shirosaidev/stocksight for more information. -Copyright (C) Chris Park 2018 +Copyright (C) Chris Park 2018-2019 Copyright (C) Allen (Jian Feng) Xie 2019 stocksight is released under the Apache 2.0 license. See LICENSE for the full license text. diff --git a/src/StockSight/YahooFinanceListener.py b/src/StockSight/YahooFinanceListener.py index e492f47..43a697e 100644 --- a/src/StockSight/YahooFinanceListener.py +++ b/src/StockSight/YahooFinanceListener.py @@ -1,6 +1,16 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""SeekAlphaListener.py - get headline sentiment from SeekingAlpha and add to +Elasticsearch. +See README.md or https://github.com/shirosaidev/stocksight +for more information. + +Copyright (C) Chris Park 2018-2019 +Copyright (C) Allen (Jian Feng) Xie 2019 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" from StockSight.NewsHeadlineListener import * -import time -from random import randint class YahooFinanceListener(NewsHeadlineListener): diff --git a/src/config.sample.yml b/src/config.sample.yml index 65d9910..94f4b66 100644 --- a/src/config.sample.yml +++ b/src/config.sample.yml @@ -1,8 +1,8 @@ elasticsearch: host : elasticsearch port : 9200 - user : your_cred - password : your_cred + user : elastic + password : changeme table_prefix: sentiment : stocksight_sentiment_ price : stocksight_price_ @@ -18,14 +18,16 @@ sentiment_analyzer: - giveaway - vs +console_output_mode: normal #normal, verbose, debug, quiet + news: - follow_link: false + follow_link: true twitter: - consumer_key : your_cred - consumer_secret : your_cred - access_token : your_cred - access_token_secret : your_cred + consumer_key : + consumer_secret : + access_token : + access_token_secret : min_followers : 1000 feeds : - '@elonmusk' @@ -39,8 +41,8 @@ twitter: - '@reutersbiz' stock_price: - time_check : true #True, fetch the price only within the below time range - weekday_start : 0 + time_check : false #True, fetch the price only within the below time range + weekday_start : 0 #0 is Monday, 6 is Sunday weekday_end : 4 hour_start : 9 hour_end : 17 diff --git a/src/definitions.py b/src/definitions.py index 9727287..7a8afe7 100644 --- a/src/definitions.py +++ b/src/definitions.py @@ -1,3 +1,14 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""stockprice.py - get stock price from Yahoo and add to +Elasticsearch. +See README.md or https://github.com/shirosaidev/stocksight +for more information. + +Copyright (C) Allen (Jian Feng) Xie 2019 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" from pathlib import Path PROJECT_SRC_PATH = str(Path(__file__).parent) diff --git a/src/kibana_export/export.7.3.ndjson b/src/kibana_export/export.7.3.ndjson index 74c36ea..a7e1adb 100644 --- a/src/kibana_export/export.7.3.ndjson +++ b/src/kibana_export/export.7.3.ndjson @@ -1,7 +1,7 @@ -{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"esTypes\":[\"_id\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"esTypes\":[\"_index\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"esTypes\":[\"_source\"],\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"esTypes\":[\"_type\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"author\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"author.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"author\",\"subType\":\"multi\"},{\"name\":\"change\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"date\",\"type\":\"date\",\"esTypes\":[\"date\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"location\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"location.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"location\",\"subType\":\"multi\"},{\"name\":\"message\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"msg_id\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"msg_polarity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"msg_sentiment\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"msg_sentiment.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"msg_sentiment\",\"subType\":\"multi\"},{\"name\":\"msg_subjectivity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"polarity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_close\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_high\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_last\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_low\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_open\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"referer_url\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"referer_url.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"referer_url\",\"subType\":\"multi\"},{\"name\":\"sentiment\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"sentiment.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"sentiment\",\"subType\":\"multi\"},{\"name\":\"subjectivity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"symbol\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"title\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"title.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"title\",\"subType\":\"multi\"},{\"name\":\"url\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"url.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"url\",\"subType\":\"multi\"},{\"name\":\"vol\",\"type\":\"number\",\"esTypes\":[\"integer\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"date","title":"stocksight_*_tmpl"},"id":"tmpl-index","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2019-09-14T20:37:32.695Z","version":"Wzk2LDJd"} -{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Polarity Dashboard","uiStateJSON":"{}","version":1,"visState":"{\"aggs\":[{\"enabled\":true,\"id\":\"1\",\"params\":{},\"schema\":\"metric\",\"type\":\"count\"},{\"enabled\":true,\"id\":\"2\",\"params\":{\"field\":\"polarity\"},\"schema\":\"metric\",\"type\":\"avg\"},{\"enabled\":true,\"id\":\"3\",\"params\":{\"field\":\"polarity\",\"percents\":[50]},\"schema\":\"metric\",\"type\":\"median\"},{\"enabled\":true,\"id\":\"4\",\"params\":{\"field\":\"polarity\"},\"schema\":\"metric\",\"type\":\"max\"},{\"enabled\":true,\"id\":\"5\",\"params\":{\"field\":\"polarity\"},\"schema\":\"metric\",\"type\":\"min\"}],\"params\":{\"addLegend\":false,\"addTooltip\":true,\"dimensions\":{\"metrics\":[{\"accessor\":0,\"format\":{\"id\":\"number\",\"params\":{}},\"type\":\"vis_dimension\"}]},\"metric\":{\"colorSchema\":\"Green to Red\",\"colorsRange\":[{\"from\":0,\"to\":10000,\"type\":\"range\"}],\"invertColors\":false,\"labels\":{\"show\":true},\"metricColorMode\":\"None\",\"percentageMode\":false,\"style\":{\"bgColor\":false,\"bgFill\":\"#000\",\"fontSize\":60,\"labelColor\":false,\"subText\":\"\"},\"useRanges\":false},\"type\":\"metric\"},\"title\":\"tmpl - Polarity Dashboard\",\"type\":\"metric\"}"},"id":"tmpl-polarity-dashboard","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T02:44:45.235Z","version":"WzQwLDNd"} -{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Article List","uiStateJSON":"{\"vis\":{\"params\":{\"sort\":{\"columnIndex\":null,\"direction\":null}}}}","version":1,"visState":"{\"title\":\"tmpl - Article List\",\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":null,\"direction\":null},\"showTotal\":false,\"totalFunc\":\"sum\",\"dimensions\":{\"metrics\":[{\"accessor\":2,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"},{\"accessor\":3,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"}],\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"},{\"accessor\":1,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"subjectivity\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"title.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":100,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"Title\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"url.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":50,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"URL\"}}]}"},"id":"tmpl-article-list","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T02:56:23.360Z","version":"WzQzLDNd"} -{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Article Imports","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Article Imports\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false,\"dimensions\":{\"x\":null,\"y\":[{\"accessor\":0,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"useNormalizedEsInterval\":true,\"interval\":\"auto\",\"drop_partials\":false,\"min_doc_count\":1,\"extended_bounds\":{}}}]}"},"id":"tmpl-article-imports","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T03:11:30.053Z","version":"WzQ4LDNd"} -{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Sentiment","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Sentiment\",\"type\":\"pie\",\"params\":{\"type\":\"pie\",\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"isDonut\":true,\"labels\":{\"show\":true,\"values\":true,\"last_level\":true,\"truncate\":100},\"dimensions\":{\"metric\":{\"accessor\":1,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"},\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":5,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\"}}]}"},"id":"tmpl-sentiment","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T02:47:11.395Z","version":"WzQxLDNd"} -{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Stock Price","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Stock Price\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Price\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Price\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false,\"dimensions\":{\"x\":null,\"y\":[{\"accessor\":0,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_last\",\"customLabel\":\"Price\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"useNormalizedEsInterval\":true,\"interval\":\"auto\",\"drop_partials\":false,\"min_doc_count\":1,\"extended_bounds\":{},\"customLabel\":\"Datetime\"}}]}"},"id":"tmpl-stock-price","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-10T02:57:39.277Z","version":"WzQ1LDNd"} -{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[]}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"gridData\":{\"x\":21,\"y\":0,\"w\":15,\"h\":14,\"i\":\"1\"},\"version\":\"7.3.1\",\"panelIndex\":\"1\",\"embeddableConfig\":{},\"panelRefName\":\"panel_0\"},{\"gridData\":{\"x\":21,\"y\":14,\"w\":27,\"h\":20,\"i\":\"2\"},\"version\":\"7.3.1\",\"panelIndex\":\"2\",\"embeddableConfig\":{},\"panelRefName\":\"panel_1\"},{\"gridData\":{\"x\":0,\"y\":0,\"w\":21,\"h\":14,\"i\":\"3\"},\"version\":\"7.3.1\",\"panelIndex\":\"3\",\"embeddableConfig\":{},\"panelRefName\":\"panel_2\"},{\"gridData\":{\"x\":36,\"y\":0,\"w\":12,\"h\":14,\"i\":\"4\"},\"version\":\"7.3.1\",\"panelIndex\":\"4\",\"embeddableConfig\":{},\"panelRefName\":\"panel_3\"},{\"gridData\":{\"x\":0,\"y\":14,\"w\":21,\"h\":20,\"i\":\"5\"},\"version\":\"7.3.1\",\"panelIndex\":\"5\",\"embeddableConfig\":{},\"panelRefName\":\"panel_4\"}]","timeRestore":false,"title":"tmpl - Dashboard","version":1},"id":"tmpl-dashboard","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"tmpl-polarity-dashboard","name":"panel_0","type":"visualization"},{"id":"tmpl-article-list","name":"panel_1","type":"visualization"},{"id":"tmpl-article-imports","name":"panel_2","type":"visualization"},{"id":"tmpl-sentiment","name":"panel_3","type":"visualization"},{"id":"tmpl-stock-price","name":"panel_4","type":"visualization"}],"type":"dashboard","updated_at":"2019-09-10T03:05:01.030Z","version":"WzQ2LDNd"} \ No newline at end of file +{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"esTypes\":[\"_id\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"esTypes\":[\"_index\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"esTypes\":[\"_source\"],\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"esTypes\":[\"_type\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"author\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"author.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"author\",\"subType\":\"multi\"},{\"name\":\"change\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"date\",\"type\":\"date\",\"esTypes\":[\"date\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"location\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"location.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"location\",\"subType\":\"multi\"},{\"name\":\"message\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"msg_id\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"msg_polarity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"msg_sentiment\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"msg_sentiment.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"msg_sentiment\",\"subType\":\"multi\"},{\"name\":\"msg_subjectivity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"polarity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_close\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_high\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_last\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_low\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_open\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"referer_url\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"referer_url.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"referer_url\",\"subType\":\"multi\"},{\"name\":\"sentiment\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"sentiment.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"sentiment\",\"subType\":\"multi\"},{\"name\":\"subjectivity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"symbol\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"title\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"title.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"title\",\"subType\":\"multi\"},{\"name\":\"url\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"url.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"url\",\"subType\":\"multi\"},{\"name\":\"vol\",\"type\":\"number\",\"esTypes\":[\"integer\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"date","title":"stocksight_*_tmpl"},"id":"tmpl-index","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2019-09-17T04:37:55.844Z","version":"WzQ4MCw4XQ=="} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"polarity :*\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Polarity Dashboard","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Polarity Dashboard\",\"type\":\"metric\",\"params\":{\"metric\":{\"percentageMode\":false,\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"metricColorMode\":\"None\",\"colorsRange\":[{\"type\":\"range\",\"from\":0,\"to\":10000}],\"labels\":{\"show\":true},\"invertColors\":false,\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\",\"fontSize\":60}},\"dimensions\":{\"metrics\":[{\"type\":\"vis_dimension\",\"accessor\":0,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":1,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":2,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":3,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":4,\"format\":{\"id\":\"number\",\"params\":{}}}]},\"addTooltip\":true,\"addLegend\":false,\"type\":\"metric\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"5\",\"enabled\":true,\"type\":\"min\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"6\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"msg_polarity\"}},{\"id\":\"7\",\"enabled\":true,\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"msg_polarity\"}},{\"id\":\"8\",\"enabled\":true,\"type\":\"min\",\"schema\":\"metric\",\"params\":{\"field\":\"msg_polarity\"}}]}"},"id":"tmpl-polarity-dashboard","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-17T04:42:04.200Z","version":"WzQ4OCw4XQ=="} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Article List","uiStateJSON":"{\"vis\":{\"params\":{\"sort\":{\"columnIndex\":null,\"direction\":null}}}}","version":1,"visState":"{\"title\":\"tmpl - Article List\",\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":null,\"direction\":null},\"showTotal\":false,\"totalFunc\":\"sum\",\"dimensions\":{\"metrics\":[{\"accessor\":2,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"},{\"accessor\":3,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"}],\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"},{\"accessor\":1,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"subjectivity\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"title.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":100,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"Title\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"url.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":50,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"URL\"}}]}"},"id":"tmpl-article-list","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-17T04:37:55.844Z","version":"WzQ4Miw4XQ=="} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"polarity :*\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Article Imports","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Article Imports\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false,\"dimensions\":{\"x\":null,\"y\":[{\"accessor\":0,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"useNormalizedEsInterval\":true,\"interval\":\"auto\",\"drop_partials\":false,\"min_doc_count\":1,\"extended_bounds\":{}}}]}"},"id":"tmpl-article-imports","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-17T04:43:25.858Z","version":"WzQ4OSw4XQ=="} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Sentiment","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Sentiment\",\"type\":\"pie\",\"params\":{\"type\":\"pie\",\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"isDonut\":true,\"labels\":{\"show\":true,\"values\":true,\"last_level\":true,\"truncate\":100},\"dimensions\":{\"metric\":{\"accessor\":1,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"},\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":5,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\"}}]}"},"id":"tmpl-sentiment","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-17T04:37:55.844Z","version":"WzQ4NCw4XQ=="} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Stock Price","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Stock Price\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\",\"defaultYExtents\":true,\"boundsMargin\":0.1},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Price\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Price\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false,\"dimensions\":{\"x\":{\"accessor\":0,\"format\":{\"id\":\"date\",\"params\":{\"pattern\":\"YYYY-MM-DD\"}},\"params\":{\"date\":true,\"interval\":\"P1D\",\"format\":\"YYYY-MM-DD\",\"bounds\":{\"min\":\"2019-06-04T04:23:34.512Z\",\"max\":\"2019-09-17T04:23:34.512Z\"}},\"aggType\":\"date_histogram\"},\"y\":[{\"accessor\":1,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_last\",\"customLabel\":\"Price\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"timeRange\":{\"from\":\"now-15w\",\"to\":\"now\"},\"useNormalizedEsInterval\":true,\"interval\":\"auto\",\"drop_partials\":false,\"min_doc_count\":1,\"extended_bounds\":{},\"customLabel\":\"Datetime\"}}]}"},"id":"tmpl-stock-price","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-17T04:37:55.844Z","version":"WzQ4NSw4XQ=="} +{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[]}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"gridData\":{\"x\":21,\"y\":0,\"w\":15,\"h\":14,\"i\":\"1\"},\"version\":\"7.3.1\",\"panelIndex\":\"1\",\"embeddableConfig\":{},\"panelRefName\":\"panel_0\"},{\"gridData\":{\"x\":21,\"y\":14,\"w\":27,\"h\":20,\"i\":\"2\"},\"version\":\"7.3.1\",\"panelIndex\":\"2\",\"embeddableConfig\":{},\"panelRefName\":\"panel_1\"},{\"gridData\":{\"x\":0,\"y\":0,\"w\":21,\"h\":14,\"i\":\"3\"},\"version\":\"7.3.1\",\"panelIndex\":\"3\",\"embeddableConfig\":{},\"panelRefName\":\"panel_2\"},{\"gridData\":{\"x\":36,\"y\":0,\"w\":12,\"h\":14,\"i\":\"4\"},\"version\":\"7.3.1\",\"panelIndex\":\"4\",\"embeddableConfig\":{},\"panelRefName\":\"panel_3\"},{\"gridData\":{\"x\":0,\"y\":14,\"w\":21,\"h\":20,\"i\":\"5\"},\"version\":\"7.3.1\",\"panelIndex\":\"5\",\"embeddableConfig\":{},\"panelRefName\":\"panel_4\"}]","timeRestore":false,"title":"tmpl - Dashboard","version":1},"id":"tmpl-dashboard","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"tmpl-polarity-dashboard","name":"panel_0","type":"visualization"},{"id":"tmpl-article-list","name":"panel_1","type":"visualization"},{"id":"tmpl-article-imports","name":"panel_2","type":"visualization"},{"id":"tmpl-sentiment","name":"panel_3","type":"visualization"},{"id":"tmpl-stock-price","name":"panel_4","type":"visualization"}],"type":"dashboard","updated_at":"2019-09-17T04:37:55.844Z","version":"WzQ4Niw4XQ=="} \ No newline at end of file diff --git a/src/news.sentiment.py b/src/news.sentiment.py index 9fc626d..a4d4462 100644 --- a/src/news.sentiment.py +++ b/src/news.sentiment.py @@ -11,11 +11,9 @@ LICENSE for the full license text. """ -import argparse + import sys import threading -import time -from random import randint from StockSight.YahooFinanceListener import * from StockSight.SeekAlphaListener import * @@ -27,31 +25,6 @@ if __name__ == '__main__': - # parse cli args - parser = argparse.ArgumentParser() - parser.add_argument("-v", "--verbose", action="store_true", - help="Increase output verbosity") - parser.add_argument("--debug", action="store_true", - help="Debug message output") - parser.add_argument("-q", "--quiet", action="store_true", - help="Run quiet with no message output") - parser.add_argument("-V", "--version", action="version", - version="stocksight v%s" % STOCKSIGHT_VERSION, - help="Prints version and exits") - args = parser.parse_args() - - if args.verbose: - logger.setLevel(logging.INFO) - eslogger.setLevel(logging.INFO) - requestslogger.setLevel(logging.INFO) - if args.debug: - logger.setLevel(logging.DEBUG) - eslogger.setLevel(logging.DEBUG) - requestslogger.setLevel(logging.DEBUG) - if args.quiet: - logger.disabled = True - eslogger.disabled = True - requestslogger.disabled = True try: for symbol in config['symbols']: diff --git a/src/startup.sh b/src/startup.sh index 0e310cd..00da000 100755 --- a/src/startup.sh +++ b/src/startup.sh @@ -55,26 +55,30 @@ echo "Copy kibana dashboard if they don't exist"; python import.kibana.py & tick=0 -let sentiment_time=$tick_time*$news_cycle +stockprice_tick_time=${stockprice_tick_time:-120} +news_sentiment_tick_time=${news_sentiment_tick_time:-3600} +news_cycle=$(($news_sentiment_tick_time/$stockprice_tick_time)) +news_cycle=${news_cycle%%.*} -#echo "Spawning Tweet Sentiment receiver instance"; -#python tweet.sentiment.py & +echo "Spawning Tweet Sentiment receiver instance"; +python tweet.sentiment.py & while true do echo "Spawning stock price receiver instance"; python stockprice.py & - echo "Will get stock data again in ${tick_time} sec..."; - let tick_mod=tick%$news_cycle + echo "Will get stock data again in ${stockprice_tick_time} sec..."; + + tick_mod=$((tick%$news_cycle)) if [ $tick_mod -eq 0 ] then echo "Spawning News Headline Sentiment receiver instance"; python news.sentiment.py & - echo "Will get sentiment data again in ${sentiment_time} sec..."; + echo "Will get sentiment data again in ${news_sentiment_tick_time} sec..."; let tick=0; fi - sleep $tick_time + sleep $stockprice_tick_time let tick++ done diff --git a/src/stockprice.py b/src/stockprice.py index 782c4de..e404806 100644 --- a/src/stockprice.py +++ b/src/stockprice.py @@ -5,7 +5,7 @@ See README.md or https://github.com/shirosaidev/stocksight for more information. -Copyright (C) Chris Park 2018 +Copyright (C) Chris Park 2018-2019 Copyright (C) Allen (Jian Feng) Xie 2019 stocksight is released under the Apache 2.0 license. See LICENSE for the full license text. @@ -17,8 +17,8 @@ import time from random import randint -from StockSight.Initializer.ConfigReader import * from StockSight.Initializer.ElasticSearch import es +from StockSight.Initializer.Logger import * from StockSight.EsMap.StockPrice import mapping from StockSight.StockPriceListener import StockPriceListener @@ -29,55 +29,6 @@ if __name__ == '__main__': - # parse cli args - parser = argparse.ArgumentParser() - - parser.add_argument("-v", "--verbose", action="store_true", - help="Increase output verbosity") - parser.add_argument("--debug", action="store_true", - help="Debug message output") - parser.add_argument("-q", "--quiet", action="store_true", - help="Run quiet with no message output") - parser.add_argument("-V", "--version", action="version", - version="stocksight v%s" % STOCKSIGHT_VERSION, - help="Prints version and exits") - args = parser.parse_args() - - # set up logging - logger = logging.getLogger('stocksight') - logger.setLevel(logging.INFO) - eslogger = logging.getLogger('elasticsearch') - eslogger.setLevel(logging.WARNING) - requestslogger = logging.getLogger('requests') - requestslogger.setLevel(logging.WARNING) - logging.addLevelName( - logging.INFO, "\033[1;32m%s\033[1;0m" - % logging.getLevelName(logging.INFO)) - logging.addLevelName( - logging.WARNING, "\033[1;31m%s\033[1;0m" - % logging.getLevelName(logging.WARNING)) - logging.addLevelName( - logging.ERROR, "\033[1;41m%s\033[1;0m" - % logging.getLevelName(logging.ERROR)) - logging.addLevelName( - logging.DEBUG, "\033[1;33m%s\033[1;0m" - % logging.getLevelName(logging.DEBUG)) - logformatter = '%(asctime)s [%(levelname)s][%(name)s] %(message)s' - loglevel = logging.INFO - logging.basicConfig(format=logformatter, level=loglevel) - if args.verbose: - logger.setLevel(logging.INFO) - eslogger.setLevel(logging.INFO) - requestslogger.setLevel(logging.INFO) - if args.debug: - logger.setLevel(logging.DEBUG) - eslogger.setLevel(logging.DEBUG) - requestslogger.setLevel(logging.DEBUG) - if args.quiet: - logger.disabled = True - eslogger.disabled = True - requestslogger.disabled = True - try: for symbol in config['symbols']: try: @@ -90,7 +41,7 @@ priceThread = threading.Thread(target=stockprice.get_price,args=(symbol,)) priceThread.start() - time.sleep(randint(5,15)) + time.sleep(randint(2,5)) except Exception as e: logger.warning("%s" % e) diff --git a/src/tweet.sentiment.py b/src/tweet.sentiment.py index ef683d5..4be0297 100644 --- a/src/tweet.sentiment.py +++ b/src/tweet.sentiment.py @@ -5,7 +5,7 @@ See README.md or https://github.com/shirosaidev/stocksight for more information. -Copyright (C) Chris Park 2018 +Copyright (C) Chris Park 2018-2019 Copyright (C) Allen (Jian Feng) Xie 2019 stocksight is released under the Apache 2.0 license. See LICENSE for the full license text. @@ -15,7 +15,6 @@ import sys from random import randint -from StockSight.Initializer.ConfigReader import * from StockSight.TweetListener import * from StockSight.EsMap.Sentiment import * from tweepy import API, Stream, OAuthHandler, TweepError @@ -26,31 +25,7 @@ if __name__ == '__main__': - # parse cli args - parser = argparse.ArgumentParser() - parser.add_argument("-v", "--verbose", action="store_true", - help="Increase output verbosity") - parser.add_argument("--debug", action="store_true", - help="Debug message output") - parser.add_argument("-q", "--quiet", action="store_true", - help="Run quiet with no message output") - parser.add_argument("-V", "--version", action="version", - version="stocksight v%s" % STOCKSIGHT_VERSION, - help="Prints version and exits") - args = parser.parse_args() - - if args.verbose: - logger.setLevel(logging.INFO) - eslogger.setLevel(logging.INFO) - requestslogger.setLevel(logging.INFO) - if args.debug: - logger.setLevel(logging.DEBUG) - eslogger.setLevel(logging.DEBUG) - requestslogger.setLevel(logging.DEBUG) - if args.quiet: - logger.disabled = True - eslogger.disabled = True - requestslogger.disabled = True + consumer_key = config['twitter']['consumer_key'] consumer_secret = config['twitter']['consumer_secret'] @@ -62,7 +37,7 @@ not consumer_secret or \ not access_token or \ not access_token_secret: - logger.warning("Invalid Twitter API cred") + logger.error("Invalid Twitter API cred") sys.exit(1) try: diff --git a/tests/StockSight/Helper/RequestRandomizerTest.py b/tests/StockSight/Helper/RequestRandomizerTest.py index 190d381..5d54d14 100644 --- a/tests/StockSight/Helper/RequestRandomizerTest.py +++ b/tests/StockSight/Helper/RequestRandomizerTest.py @@ -1,3 +1,11 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" Request Randomizer Test + +Copyright (C) Allen (Jian Feng) Xie 2019 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" import unittest from StockSight.Helper.RequestRandomizer import RequestRandomizer from StockSight.Initializer.ConfigReader import config diff --git a/tests/StockSight/SeekAlphaListenerTest.py b/tests/StockSight/SeekAlphaListenerTest.py index 52d7112..14f8092 100644 --- a/tests/StockSight/SeekAlphaListenerTest.py +++ b/tests/StockSight/SeekAlphaListenerTest.py @@ -1,3 +1,12 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" Seeking Alpha Listener Tests + +Copyright (C) Allen (Jian Feng) Xie 2019 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" + import unittest from StockSight.SeekAlphaListener import * from StockSight.EsMap.Sentiment import mapping @@ -33,7 +42,7 @@ def test_get_news_headlines(self): self.assertIsNotNone(headlines[0].url, "URL is empty") self.assertIsNotNone(headlines[0].referer_url, "Refer URL is empty") - #always fails b/c of 403. + #always fails b/c of 403 requests. def test_get_news_headlines_with_body(self): config['news']['follow_link'] = True headlines = self.mainClass.get_news_headlines() @@ -54,7 +63,7 @@ def test_get_news_headlines_with_body(self): def test_execute(self): self.mainClass.index_name = self.index_name self.mainClass.execute() - time.sleep(1) + time.sleep(5) logs = es.search(index=self.index_name,body="{}") message = logs['hits']['hits'][0]['_source'] self.assertIsNotNone(message['title'], "Title is empty") diff --git a/tests/StockSight/StockPriceListenerTest.py b/tests/StockSight/StockPriceListenerTest.py index 83caf53..65f3cb1 100644 --- a/tests/StockSight/StockPriceListenerTest.py +++ b/tests/StockSight/StockPriceListenerTest.py @@ -1,5 +1,13 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" Stock Price listener test + +Copyright (C) Allen (Jian Feng) Xie 2019 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" + import unittest -import time from StockSight.StockPriceListener import * from StockSight.EsMap.StockPrice import mapping diff --git a/tests/StockSight/YahooFinanceListenerTest.py b/tests/StockSight/YahooFinanceListenerTest.py index 08e94dc..154d864 100644 --- a/tests/StockSight/YahooFinanceListenerTest.py +++ b/tests/StockSight/YahooFinanceListenerTest.py @@ -1,3 +1,11 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""Yahoo Finance News Test + +Copyright (C) Allen (Jian Feng) Xie 2019 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" import unittest from StockSight.YahooFinanceListener import * from StockSight.EsMap.Sentiment import mapping @@ -49,7 +57,7 @@ def test_get_news_headlines_with_body(self): def test_execute(self): self.mainClass.index_name = self.index_name self.mainClass.execute() - time.sleep(1) + time.sleep(5) logs = es.search(index=self.index_name,body="{}") message = logs['hits']['hits'][0]['_source'] self.assertIsNotNone(message['title'], "Title is empty") From fb6bea144a42cb4a074796c0f34cecc2f20e4ddc Mon Sep 17 00:00:00 2001 From: heyqule Date: Sun, 22 Sep 2019 16:29:50 -0400 Subject: [PATCH 48/55] - Improve Kibana Dashboard - Add copyright blocks - FIx Change Log - Move sleep timer values to config.yml --- .github/FUNDING.yml | 1 + CHANGELOG.md | 24 ++++++ README.md | 86 ++++++++++++++----- docker-compose.yml | 1 - src/StockSight/EsMap/Sentiment.py | 10 +++ src/StockSight/EsMap/StockPrice.py | 11 +++ src/StockSight/Helper/RequestRandomizer.py | 1 + src/StockSight/Initializer/ConfigReader.py | 1 + src/StockSight/Model/Article.py | 1 + src/StockSight/NewsHeadlineListener.py | 5 +- src/StockSight/SeekAlphaListener.py | 16 ++-- src/StockSight/YahooFinanceListener.py | 8 +- src/config.sample.yml | 19 +++- src/definitions.py | 1 + src/delindex.py | 3 + src/import.kibana.py | 6 +- src/kibana_export/export.7.3.ndjson | 14 +-- src/news.sentiment.py | 16 ++-- src/stockprice.py | 11 ++- src/tweet.sentiment.py | 2 +- .../v0.1_deprecated/README.og.md | 0 .../config.sample.py | 0 .../sentiment.og.py | 0 .../stockprice.og.py | 0 .../Helper/RequestRandomizerTest.py | 1 + tests/StockSight/SeekAlphaListenerTest.py | 3 +- tests/StockSight/StockPriceListenerTest.py | 1 + tests/StockSight/YahooFinanceListenerTest.py | 3 +- 28 files changed, 179 insertions(+), 66 deletions(-) rename README.og.md => src/v0.1_deprecated/README.og.md (100%) rename src/{Original => v0.1_deprecated}/config.sample.py (100%) rename src/{Original => v0.1_deprecated}/sentiment.og.py (100%) rename src/{Original => v0.1_deprecated}/stockprice.og.py (100%) diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index 156fb2a..af0afdd 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -1,2 +1,3 @@ patreon: shirosaidev custom: https://www.paypal.me/shirosaidev +custom: https://www.paypal.me/heyqule \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e001b5..542612f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,29 @@ # stocksight Change Log +## [0.2] = 2019-09-22 +### major changes +- Dockerized the system. CLI scripts are retired. + - All settings are fetched from src/config.yml. +- Replaced ElasticSearch 5.6 with ElasticSearch 7.3. +- Added Redis for caching. +- Automated requirements installation and kibana dashboard setup. +- Converted original scripts to modules and classes to simplify the process of building new extensions +- Data mapping have changed. + - Each Symbol has it's own set of index. One for sentiment and one for price. + - See src/Stocksight/EsMap for mapping details +- Sentiment and price crawlers are spawned concurrently based on your specified stock symbols. +- Improved memory footprint by spawning python instances when it's needed. + +### added +- Added SeekingAlpha crawler +- Added integration test cases +- Added support for generating random proxy and random user-agent. + - may not be useful for sophisticated blockers. + +### issues: +- SeekingAlpha blocks frequent accesses with 403. Follow_link is disabled for it. + + ## [0.1-b.6] = 2019-07-15 ### fixed - "TypeError: sequence item 0: expected str instance, int found" traceback error when running with -f twitteruserids.txt diff --git a/README.md b/README.md index 3817172..1283bab 100644 --- a/README.md +++ b/README.md @@ -7,11 +7,11 @@ Crowd-sourced stock analyzer and stock predictor using Elasticsearch, Twitter, N [![Release](https://img.shields.io/github/release/shirosaidev/stocksight.svg?label=Release&maxAge=60)](https://github.com/shirosaidev/stocksight/releases/latest) ### Authors -Original Author (Chris Park) +Chris Park [![Sponsor Patreon](https://img.shields.io/badge/Sponsor%20%24-Patreon-brightgreen.svg)](https://www.patreon.com/shirosaidev) [![Donate PayPal](https://img.shields.io/badge/Donate%20%24-PayPal-brightgreen.svg)](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=CLF223XAS4W72) -Docker and new features author (Allen Jian Feng Xie) +Allen Jian Feng Xie [![Donate PayPal](https://img.shields.io/badge/Donate%20%24-PayPal-brightgreen.svg)](https://www.paypal.com/paypalme2/heyqule) ## About @@ -19,25 +19,42 @@ stocksight is a crowd-sourced stock analysis open source software that uses Elas Each user running stocksight has a unique fingerprint: specific stocks they are following, news sites and twitter users they follow to find information for those stocks. This creates a unique sentiment analysis for each user, based on what data sources they are getting stocksight to search. Users can have the same stocks, but their data sources could vary significantly creating different sentiment analysis for the same stock. stocksight website will allow each user to see other sentiment analysis results from other stocksight user app results and a combined aggregated view of all. +stocksight diagram + +### Upgrade From 0.1 +Version 0.2 went through an architectural revamp. You will have to COPY the v0.1 data from Elastic 5.6 to Elastic 7.3 if you wish to retain your previous data. + +The ElasticSearch index mappings are also different between two versions. Please see "src/StockSight/EsMap" files for details. + +Differences: +1. Each symbol have its own set of price and sentiment indexes. +2. Each symbol have its dashbaord in Kibana. +3. Each sentiment record have sentiment value for its title and sentiment value for its message. + - Title sentiment and message sentiment are no longer mixed together. +4. Stock Price open and close values are also saved in price index. + + ### Requirement Install Docker on your system ```shell -$ git clone https://github.com/heyqule/stocksight.git +$ git clone https://github.com/shirosaidev/stocksight.git $ cd stocksight ``` ### How to setup - Copy src/config.yml to src/config.yml - Change settings in config.yml to fit your needs - - Change ElasticSearch credential (elasticuser:user) - - Change analyzer ignore words (sentiment_analyzer:ignore_words) - - If you want to run twitter analyzer, change the setting in twitter section - - Uncomment ""#python tweet.sentiment.py &" in src/startup.sh - - Add desired stock symbol and require words to symbols section (symbol: tsla) -- Change run interval in docker-composer.yml - - default, 900 seconds for stock price, 3600 seconds for news crawler + - Change ElasticSearch credential if needed + - Change NLTK analyzer ignore words (see sentiment_analyzer:ignore_words:) + - Add twitter credential and change the twitter feed + - Create a new twitter application and generate your consumer key and access token. + - https://developer.twitter.com/en/docs/basics/developer-portal/guides/apps.html + - https://developer.twitter.com/en/docs/basics/authentication/guides/access-tokens.html + - Add desired stock symbol and require words to symbols section (see symbol: tsla) +- Change execution intervals in docker-composer.yml + - default, 120 seconds for stock price, 3600 seconds for news sentiment listeners - Run "docker-compose up" - ??? - Profit @@ -45,9 +62,25 @@ $ cd stocksight ### How to use The following action require to run in the python3 container. -###### Delete Elastic Indexes -1. Log into python docker console -2. Run "python delindex.py --delindex {index_name}" +###### View Kibana Dashboard +http://localhost:5601 + +###### Adding / Changing Stock Symbols +1. open src/config.yml +2. add stock symbol to symbol section. +3. add required keyword of the symbol. +4. the sentiment and price listeners will pick up the change on their next run. + +###### Change Twitter Settings When the Instance Is Running. +1. Update the config.yml +2. Log into python container +3. kill twitter.sentiment.py +4. rerun it with "python twitter.sentiment.py &" + +##### Adding new news sentiment listener +1. See SeekAlphaListener and YahooFinanceListener as example. +2. Add your class to news.sentitment.py +4. the sentiment runner will pick up the new listener on its next run. ###### Update Kibana Dashboard Template 1. Make change to your existing template and visualizations. @@ -55,17 +88,24 @@ The following action require to run in the python3 container. 3. Replace symbol with "tmpl" or change the id and index value to match existing ndjson. 4. Run "KIBANA_OVERWRITE=true python import.kibana.py" +###### Delete Elastic Indexes +1. Log into python docker console +2. Run "python delindex.py --delindex {index_name}" + ### Tech Stack - Python 3. (tested with Python 3.6.8 and 3.7.4) - Elasticsearch 7.3.1. - Kibana 7.3.1. -- elasticsearch python module -- nltk python module -- requests python module -- tweepy python module -- beautifulsoup4 python module -- textblob python module -- vaderSentiment python module -- pytz -- redis -- pyyaml +- Redis 5 +- Python module + - elasticsearch + - nltk + - requests + - tweepy + - beautifulsoup4 + - textblob + - vaderSentiment + - pytz + - redis + - pyyaml + - fake-useragent diff --git a/docker-compose.yml b/docker-compose.yml index 96c6f0c..6f710b7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,4 @@ # ./docker-compose.yml - version: '3' services: diff --git a/src/StockSight/EsMap/Sentiment.py b/src/StockSight/EsMap/Sentiment.py index 1932922..f8d1099 100644 --- a/src/StockSight/EsMap/Sentiment.py +++ b/src/StockSight/EsMap/Sentiment.py @@ -1,3 +1,13 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Sentiment Mapping + +Copyright (C) Chris Park 2018-2019 +Copyright (C) Allen (Jian Feng) Xie 2019 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" # set up elasticsearch mappings and create index mapping = { "mappings": { diff --git a/src/StockSight/EsMap/StockPrice.py b/src/StockSight/EsMap/StockPrice.py index 033c65c..e01d5fb 100644 --- a/src/StockSight/EsMap/StockPrice.py +++ b/src/StockSight/EsMap/StockPrice.py @@ -1,3 +1,14 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Stock Price Mapping + +Copyright (C) Chris Park 2018-2019 +Copyright (C) Allen (Jian Feng) Xie 2019 +stocksight is released under the Apache 2.0 license. See +LICENSE for the full license text. +""" + # set up elasticsearch mappings and create index mapping = { "mappings": { diff --git a/src/StockSight/Helper/RequestRandomizer.py b/src/StockSight/Helper/RequestRandomizer.py index 9c41a20..d0f40d3 100644 --- a/src/StockSight/Helper/RequestRandomizer.py +++ b/src/StockSight/Helper/RequestRandomizer.py @@ -3,6 +3,7 @@ """ implement random proxy / random user agent +Copyright (C) Chris Park 2018-2019 Copyright (C) Allen (Jian Feng) Xie 2019 stocksight is released under the Apache 2.0 license. See LICENSE for the full license text. diff --git a/src/StockSight/Initializer/ConfigReader.py b/src/StockSight/Initializer/ConfigReader.py index bc35736..6a20e67 100644 --- a/src/StockSight/Initializer/ConfigReader.py +++ b/src/StockSight/Initializer/ConfigReader.py @@ -3,6 +3,7 @@ """ Global Config Reader +Copyright (C) Chris Park 2018-2019 Copyright (C) Allen (Jian Feng) Xie 2019 stocksight is released under the Apache 2.0 license. See LICENSE for the full license text. diff --git a/src/StockSight/Model/Article.py b/src/StockSight/Model/Article.py index 1e0152f..9bef4cc 100644 --- a/src/StockSight/Model/Article.py +++ b/src/StockSight/Model/Article.py @@ -3,6 +3,7 @@ """ Article Data Holder +Copyright (C) Chris Park 2018-2019 Copyright (C) Allen (Jian Feng) Xie 2019 stocksight is released under the Apache 2.0 license. See LICENSE for the full license text. diff --git a/src/StockSight/NewsHeadlineListener.py b/src/StockSight/NewsHeadlineListener.py index 40a69ae..7615e83 100644 --- a/src/StockSight/NewsHeadlineListener.py +++ b/src/StockSight/NewsHeadlineListener.py @@ -1,7 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -"""stockprice.py - get stock price from Yahoo and add to -Elasticsearch. +"""NewsHeadlineListener.py - Base class for new sentiment listener See README.md or https://github.com/shirosaidev/stocksight for more information. @@ -139,7 +138,7 @@ def can_process(self, article): def get_soup(self, url): #try not to spam the server, but if you run with 100 stock symbols, it's probably going to spam it anyway lol. - time.sleep(randint(1,3)) + time.sleep(randint(config['spawn_intervals']['request_min'], config['spawn_intervals']['request_max'])) req = requests.get(url) html = req.text soup = BeautifulSoup(html, 'html.parser') diff --git a/src/StockSight/SeekAlphaListener.py b/src/StockSight/SeekAlphaListener.py index 45f5a77..4255106 100644 --- a/src/StockSight/SeekAlphaListener.py +++ b/src/StockSight/SeekAlphaListener.py @@ -5,14 +5,16 @@ See README.md or https://github.com/shirosaidev/stocksight for more information. +ISSUE: +SeekingAlpha block frequent access with 403. Follow_link disabled. + +Copyright (C) Chris Park 2018-2019 Copyright (C) Allen (Jian Feng) Xie 2019 stocksight is released under the Apache 2.0 license. See LICENSE for the full license text. """ from StockSight.NewsHeadlineListener import * -import time -from random import randint class SeekAlphaListener(NewsHeadlineListener): def __init__(self, symbol): @@ -38,7 +40,7 @@ def get_news_headlines(self): # if config['news']['follow_link']: # body_url = article.url - # for p in self.get_analysis_summary(body_url, 'p.bullets_li'): + # for p in self.get_page_text(body_url, 'p.bullets_li'): # article.body += str(p)+" " article.referer_url = self.url @@ -51,7 +53,7 @@ def get_news_headlines(self): # if config['news']['follow_link']: # body_url = article.url - # for p in self.get_summary(body_url, 'div.a-sum p'): + # for p in self.get_page_text(body_url, 'div.a-sum p'): # article.body += str(p)+" " article.referer_url = self.url @@ -63,10 +65,7 @@ def get_news_headlines(self): return articles - def get_page_text(self, url): - pass - - def get_summary(self, url, selector): + def get_page_text(self, url, selector): try: soup = self.get_soup(url) html_p = soup.select(selector) @@ -81,3 +80,4 @@ def get_summary(self, url, selector): except requests.exceptions.RequestException as exce: logger.warning("Exception: can't crawl web site (%s)" % exce) pass + diff --git a/src/StockSight/YahooFinanceListener.py b/src/StockSight/YahooFinanceListener.py index 43a697e..aee9e65 100644 --- a/src/StockSight/YahooFinanceListener.py +++ b/src/StockSight/YahooFinanceListener.py @@ -35,7 +35,7 @@ def get_news_headlines(self): if self.can_process(article): if config['news']['follow_link']: body_url = article.url - for p in self.get_page_text(body_url): + for p in self.get_page_text(body_url, 'p'): article.body += str(p)+" " article.referer_url = self.url @@ -47,11 +47,11 @@ def get_news_headlines(self): return articles - def get_page_text(self, url): - max_paragraphs = 5 + def get_page_text(self, url, selector): + max_paragraphs = 10 try: soup = self.get_soup(url) - html_p = soup.findAll('p') + html_p = soup.findAll(selector) if html_p: n = 1 diff --git a/src/config.sample.yml b/src/config.sample.yml index 94f4b66..e80cfc0 100644 --- a/src/config.sample.yml +++ b/src/config.sample.yml @@ -17,6 +17,7 @@ sentiment_analyzer: - win - giveaway - vs + - vs. console_output_mode: normal #normal, verbose, debug, quiet @@ -28,7 +29,7 @@ twitter: consumer_secret : access_token : access_token_secret : - min_followers : 1000 + min_followers : 1000 #Min number of followers is required to process the tweet. feeds : - '@elonmusk' - '@stockwits' @@ -46,7 +47,21 @@ stock_price: weekday_end : 4 hour_start : 9 hour_end : 17 - timezone_str : America/Toronto + timezone_str : America/New_York + + +# Thread / Request timer to limit spamming the server. +# but if you have tons of symbols, it's probably not going to matter much. +spawn_intervals: + #Stock price listener spawn timer + stockprice_min: 2 + stockprice_max: 5 + #News Sentiment listen spawn timer + news_min: 5 + news_max: 10 + #Request timer + request_min: 1 + request_max: 3 symbols: tsla: diff --git a/src/definitions.py b/src/definitions.py index 7a8afe7..0515936 100644 --- a/src/definitions.py +++ b/src/definitions.py @@ -5,6 +5,7 @@ See README.md or https://github.com/shirosaidev/stocksight for more information. +Copyright (C) Chris Park 2018-2019 Copyright (C) Allen (Jian Feng) Xie 2019 stocksight is released under the Apache 2.0 license. See LICENSE for the full license text. diff --git a/src/delindex.py b/src/delindex.py index d5ad4e7..ec8b2cc 100644 --- a/src/delindex.py +++ b/src/delindex.py @@ -15,6 +15,9 @@ from StockSight.Initializer.ElasticSearch import es from StockSight.Initializer.Logger import * +STOCKSIGHT_VERSION = '0.2' +__version__ = STOCKSIGHT_VERSION + if __name__ == '__main__': parser = argparse.ArgumentParser() diff --git a/src/import.kibana.py b/src/import.kibana.py index e8b649a..a667e74 100644 --- a/src/import.kibana.py +++ b/src/import.kibana.py @@ -5,6 +5,7 @@ See README.md or https://github.com/shirosaidev/stocksight for more information. +Copyright (C) Chris Park 2018-2019 Copyright (C) Allen (Jian Feng) Xie 2019 stocksight is released under the Apache 2.0 license. See LICENSE for the full license text. @@ -14,6 +15,9 @@ import os from StockSight.Initializer.ConfigReader import * +STOCKSIGHT_VERSION = '0.2' +__version__ = STOCKSIGHT_VERSION + if __name__ == '__main__': try: @@ -24,7 +28,7 @@ for symbol in config['symbols']: try: print("Starting %s Kibana Dashboard Import" % symbol) - ndjson_file_path = 'kibana_export/'+symbol+'_exports.ndjson' + ndjson_file_path = 'kibana_export/tmp/'+symbol+'_exports.ndjson' ndjson_file = open(ndjson_file_path, "wt", encoding='utf-8') final_text = import_template.replace('tmpl', symbol) ndjson_file.write(final_text) diff --git a/src/kibana_export/export.7.3.ndjson b/src/kibana_export/export.7.3.ndjson index a7e1adb..24b0cd1 100644 --- a/src/kibana_export/export.7.3.ndjson +++ b/src/kibana_export/export.7.3.ndjson @@ -1,7 +1,7 @@ -{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"esTypes\":[\"_id\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"esTypes\":[\"_index\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"esTypes\":[\"_source\"],\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"esTypes\":[\"_type\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"author\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"author.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"author\",\"subType\":\"multi\"},{\"name\":\"change\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"date\",\"type\":\"date\",\"esTypes\":[\"date\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"location\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"location.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"location\",\"subType\":\"multi\"},{\"name\":\"message\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"msg_id\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"msg_polarity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"msg_sentiment\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"msg_sentiment.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"msg_sentiment\",\"subType\":\"multi\"},{\"name\":\"msg_subjectivity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"polarity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_close\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_high\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_last\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_low\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_open\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"referer_url\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"referer_url.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"referer_url\",\"subType\":\"multi\"},{\"name\":\"sentiment\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"sentiment.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"sentiment\",\"subType\":\"multi\"},{\"name\":\"subjectivity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"symbol\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"title\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"title.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"title\",\"subType\":\"multi\"},{\"name\":\"url\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"url.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"url\",\"subType\":\"multi\"},{\"name\":\"vol\",\"type\":\"number\",\"esTypes\":[\"integer\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"date","title":"stocksight_*_tmpl"},"id":"tmpl-index","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2019-09-17T04:37:55.844Z","version":"WzQ4MCw4XQ=="} -{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"polarity :*\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Polarity Dashboard","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Polarity Dashboard\",\"type\":\"metric\",\"params\":{\"metric\":{\"percentageMode\":false,\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"metricColorMode\":\"None\",\"colorsRange\":[{\"type\":\"range\",\"from\":0,\"to\":10000}],\"labels\":{\"show\":true},\"invertColors\":false,\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\",\"fontSize\":60}},\"dimensions\":{\"metrics\":[{\"type\":\"vis_dimension\",\"accessor\":0,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":1,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":2,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":3,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":4,\"format\":{\"id\":\"number\",\"params\":{}}}]},\"addTooltip\":true,\"addLegend\":false,\"type\":\"metric\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"5\",\"enabled\":true,\"type\":\"min\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"6\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"msg_polarity\"}},{\"id\":\"7\",\"enabled\":true,\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"msg_polarity\"}},{\"id\":\"8\",\"enabled\":true,\"type\":\"min\",\"schema\":\"metric\",\"params\":{\"field\":\"msg_polarity\"}}]}"},"id":"tmpl-polarity-dashboard","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-17T04:42:04.200Z","version":"WzQ4OCw4XQ=="} -{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Article List","uiStateJSON":"{\"vis\":{\"params\":{\"sort\":{\"columnIndex\":null,\"direction\":null}}}}","version":1,"visState":"{\"title\":\"tmpl - Article List\",\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":null,\"direction\":null},\"showTotal\":false,\"totalFunc\":\"sum\",\"dimensions\":{\"metrics\":[{\"accessor\":2,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"},{\"accessor\":3,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"}],\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"},{\"accessor\":1,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"subjectivity\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"title.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":100,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"Title\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"url.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":50,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"URL\"}}]}"},"id":"tmpl-article-list","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-17T04:37:55.844Z","version":"WzQ4Miw4XQ=="} -{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"polarity :*\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Article Imports","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Article Imports\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false,\"dimensions\":{\"x\":null,\"y\":[{\"accessor\":0,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"useNormalizedEsInterval\":true,\"interval\":\"auto\",\"drop_partials\":false,\"min_doc_count\":1,\"extended_bounds\":{}}}]}"},"id":"tmpl-article-imports","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-17T04:43:25.858Z","version":"WzQ4OSw4XQ=="} -{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Sentiment","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Sentiment\",\"type\":\"pie\",\"params\":{\"type\":\"pie\",\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"isDonut\":true,\"labels\":{\"show\":true,\"values\":true,\"last_level\":true,\"truncate\":100},\"dimensions\":{\"metric\":{\"accessor\":1,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"},\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":5,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\"}}]}"},"id":"tmpl-sentiment","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-17T04:37:55.844Z","version":"WzQ4NCw4XQ=="} -{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Stock Price","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Stock Price\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\",\"defaultYExtents\":true,\"boundsMargin\":0.1},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Price\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Price\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false,\"dimensions\":{\"x\":{\"accessor\":0,\"format\":{\"id\":\"date\",\"params\":{\"pattern\":\"YYYY-MM-DD\"}},\"params\":{\"date\":true,\"interval\":\"P1D\",\"format\":\"YYYY-MM-DD\",\"bounds\":{\"min\":\"2019-06-04T04:23:34.512Z\",\"max\":\"2019-09-17T04:23:34.512Z\"}},\"aggType\":\"date_histogram\"},\"y\":[{\"accessor\":1,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_last\",\"customLabel\":\"Price\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"timeRange\":{\"from\":\"now-15w\",\"to\":\"now\"},\"useNormalizedEsInterval\":true,\"interval\":\"auto\",\"drop_partials\":false,\"min_doc_count\":1,\"extended_bounds\":{},\"customLabel\":\"Datetime\"}}]}"},"id":"tmpl-stock-price","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-17T04:37:55.844Z","version":"WzQ4NSw4XQ=="} -{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[]}"},"optionsJSON":"{\"useMargins\":true,\"hidePanelTitles\":false}","panelsJSON":"[{\"gridData\":{\"x\":21,\"y\":0,\"w\":15,\"h\":14,\"i\":\"1\"},\"version\":\"7.3.1\",\"panelIndex\":\"1\",\"embeddableConfig\":{},\"panelRefName\":\"panel_0\"},{\"gridData\":{\"x\":21,\"y\":14,\"w\":27,\"h\":20,\"i\":\"2\"},\"version\":\"7.3.1\",\"panelIndex\":\"2\",\"embeddableConfig\":{},\"panelRefName\":\"panel_1\"},{\"gridData\":{\"x\":0,\"y\":0,\"w\":21,\"h\":14,\"i\":\"3\"},\"version\":\"7.3.1\",\"panelIndex\":\"3\",\"embeddableConfig\":{},\"panelRefName\":\"panel_2\"},{\"gridData\":{\"x\":36,\"y\":0,\"w\":12,\"h\":14,\"i\":\"4\"},\"version\":\"7.3.1\",\"panelIndex\":\"4\",\"embeddableConfig\":{},\"panelRefName\":\"panel_3\"},{\"gridData\":{\"x\":0,\"y\":14,\"w\":21,\"h\":20,\"i\":\"5\"},\"version\":\"7.3.1\",\"panelIndex\":\"5\",\"embeddableConfig\":{},\"panelRefName\":\"panel_4\"}]","timeRestore":false,"title":"tmpl - Dashboard","version":1},"id":"tmpl-dashboard","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"tmpl-polarity-dashboard","name":"panel_0","type":"visualization"},{"id":"tmpl-article-list","name":"panel_1","type":"visualization"},{"id":"tmpl-article-imports","name":"panel_2","type":"visualization"},{"id":"tmpl-sentiment","name":"panel_3","type":"visualization"},{"id":"tmpl-stock-price","name":"panel_4","type":"visualization"}],"type":"dashboard","updated_at":"2019-09-17T04:37:55.844Z","version":"WzQ4Niw4XQ=="} \ No newline at end of file +{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"esTypes\":[\"_id\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"esTypes\":[\"_index\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"esTypes\":[\"_source\"],\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"esTypes\":[\"_type\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"author\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"author.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"author\",\"subType\":\"multi\"},{\"name\":\"change\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"date\",\"type\":\"date\",\"esTypes\":[\"date\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"location\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"location.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"location\",\"subType\":\"multi\"},{\"name\":\"message\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"msg_id\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"msg_polarity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"msg_sentiment\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"msg_sentiment.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"msg_sentiment\",\"subType\":\"multi\"},{\"name\":\"msg_subjectivity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"polarity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_close\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_high\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_last\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_low\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"price_open\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"referer_url\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"referer_url.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"referer_url\",\"subType\":\"multi\"},{\"name\":\"sentiment\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"sentiment.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"sentiment\",\"subType\":\"multi\"},{\"name\":\"subjectivity\",\"type\":\"number\",\"esTypes\":[\"float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"symbol\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"title\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"title.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"title\",\"subType\":\"multi\"},{\"name\":\"url\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"url.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"parent\":\"url\",\"subType\":\"multi\"},{\"name\":\"vol\",\"type\":\"number\",\"esTypes\":[\"integer\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"date","title":"stocksight_*_tmpl"},"id":"tmpl-index","migrationVersion":{"index-pattern":"6.5.0"},"references":[],"type":"index-pattern","updated_at":"2019-09-18T06:29:35.661Z","version":"WzY1MywxMl0="} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"polarity :*\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Polarity Dashboard","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Polarity Dashboard\",\"type\":\"metric\",\"params\":{\"metric\":{\"percentageMode\":false,\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"metricColorMode\":\"None\",\"colorsRange\":[{\"type\":\"range\",\"from\":0,\"to\":10000}],\"labels\":{\"show\":true},\"invertColors\":false,\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\",\"fontSize\":60}},\"dimensions\":{\"metrics\":[{\"type\":\"vis_dimension\",\"accessor\":0,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":1,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":2,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":3,\"format\":{\"id\":\"number\",\"params\":{}}},{\"type\":\"vis_dimension\",\"accessor\":4,\"format\":{\"id\":\"number\",\"params\":{}}}]},\"addTooltip\":true,\"addLegend\":false,\"type\":\"metric\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"5\",\"enabled\":true,\"type\":\"min\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\"}},{\"id\":\"6\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"msg_polarity\"}},{\"id\":\"7\",\"enabled\":true,\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"msg_polarity\"}},{\"id\":\"8\",\"enabled\":true,\"type\":\"min\",\"schema\":\"metric\",\"params\":{\"field\":\"msg_polarity\"}}]}"},"id":"tmpl-polarity-dashboard","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-18T06:29:35.661Z","version":"WzY1NCwxMl0="} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Article List","uiStateJSON":"{\"vis\":{\"params\":{\"sort\":{\"columnIndex\":null,\"direction\":null}}}}","version":1,"visState":"{\"title\":\"tmpl - Article List\",\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":null,\"direction\":null},\"showTotal\":false,\"totalFunc\":\"sum\",\"dimensions\":{\"metrics\":[{\"accessor\":2,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"},{\"accessor\":3,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"},{\"accessor\":4,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"}],\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"},{\"accessor\":1,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"polarity\",\"customLabel\":\"Polarity\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"subjectivity\",\"customLabel\":\"Subjectivity\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"title.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":100,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"Title\"}},{\"id\":\"4\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"url.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":50,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"URL\"}},{\"id\":\"5\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"msg_polarity\",\"customLabel\":\"Msg Polarity\"}},{\"id\":\"6\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"msg_subjectivity\",\"customLabel\":\"Msg Subjectivity\"}}]}"},"id":"tmpl-article-list","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-18T06:29:35.661Z","version":"WzY1NSwxMl0="} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"polarity :*\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Article Imports","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Article Imports\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"top\",\"times\":[],\"addTimeMarker\":false,\"dimensions\":{\"x\":{\"accessor\":0,\"format\":{\"id\":\"date\",\"params\":{\"pattern\":\"YYYY-MM-DD HH:mm\"}},\"params\":{\"date\":true,\"interval\":\"PT3H\",\"format\":\"YYYY-MM-DD HH:mm\",\"bounds\":{\"min\":\"2019-09-11T06:31:27.422Z\",\"max\":\"2019-09-18T06:31:27.423Z\"}},\"aggType\":\"date_histogram\"},\"y\":[{\"accessor\":1,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"timeRange\":{\"from\":\"now-7d\",\"to\":\"now\"},\"useNormalizedEsInterval\":true,\"interval\":\"auto\",\"drop_partials\":false,\"min_doc_count\":1,\"extended_bounds\":{}}}]}"},"id":"tmpl-article-imports","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-18T06:31:42.786Z","version":"WzY2MywxMl0="} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Sentiment","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Sentiment\",\"type\":\"pie\",\"params\":{\"type\":\"pie\",\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"top\",\"isDonut\":true,\"labels\":{\"show\":true,\"values\":true,\"last_level\":true,\"truncate\":100},\"dimensions\":{\"metric\":{\"accessor\":1,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"count\"},\"buckets\":[{\"accessor\":0,\"format\":{\"id\":\"terms\",\"params\":{\"id\":\"string\",\"otherBucketLabel\":\"Other\",\"missingBucketLabel\":\"Missing\"}},\"params\":{},\"aggType\":\"terms\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"sentiment.keyword\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":5,\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\"}}]}"},"id":"tmpl-sentiment","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-18T06:31:58.886Z","version":"WzY2NCwxMl0="} +{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"kuery\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"tmpl - Stock Price","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"tmpl - Stock Price\",\"type\":\"line\",\"params\":{\"type\":\"line\",\"grid\":{\"categoryLines\":false},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"filter\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\",\"defaultYExtents\":true,\"boundsMargin\":0.1},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Price\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"line\",\"mode\":\"normal\",\"data\":{\"label\":\"Price\",\"id\":\"1\"},\"valueAxis\":\"ValueAxis-1\",\"drawLinesBetweenPoints\":true,\"showCircles\":true}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"top\",\"times\":[],\"addTimeMarker\":false,\"dimensions\":{\"x\":{\"accessor\":0,\"format\":{\"id\":\"date\",\"params\":{\"pattern\":\"YYYY-MM-DD HH:mm\"}},\"params\":{\"date\":true,\"interval\":\"PT3H\",\"format\":\"YYYY-MM-DD HH:mm\",\"bounds\":{\"min\":\"2019-09-11T06:32:05.458Z\",\"max\":\"2019-09-18T06:32:05.458Z\"}},\"aggType\":\"date_histogram\"},\"y\":[{\"accessor\":1,\"format\":{\"id\":\"number\"},\"params\":{},\"aggType\":\"avg\"}]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"price_last\",\"customLabel\":\"Price\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"date\",\"timeRange\":{\"from\":\"now-7d\",\"to\":\"now\"},\"useNormalizedEsInterval\":true,\"interval\":\"auto\",\"drop_partials\":false,\"min_doc_count\":1,\"extended_bounds\":{},\"customLabel\":\"Datetime\"}}]}"},"id":"tmpl-stock-price","migrationVersion":{"visualization":"7.3.1"},"references":[{"id":"tmpl-index","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2019-09-18T06:32:15.550Z","version":"WzY2NSwxMl0="} +{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"\"},\"filter\":[]}"},"optionsJSON":"{\"hidePanelTitles\":false,\"useMargins\":true}","panelsJSON":"[{\"embeddableConfig\":{},\"gridData\":{\"h\":14,\"i\":\"1\",\"w\":15,\"x\":21,\"y\":0},\"panelIndex\":\"1\",\"version\":\"7.3.1\",\"panelRefName\":\"panel_0\"},{\"embeddableConfig\":{},\"gridData\":{\"h\":20,\"i\":\"2\",\"w\":27,\"x\":21,\"y\":14},\"panelIndex\":\"2\",\"version\":\"7.3.1\",\"panelRefName\":\"panel_1\"},{\"embeddableConfig\":{},\"gridData\":{\"h\":14,\"i\":\"3\",\"w\":21,\"x\":0,\"y\":20},\"panelIndex\":\"3\",\"version\":\"7.3.1\",\"panelRefName\":\"panel_2\"},{\"embeddableConfig\":{},\"gridData\":{\"h\":14,\"i\":\"4\",\"w\":12,\"x\":36,\"y\":0},\"panelIndex\":\"4\",\"version\":\"7.3.1\",\"panelRefName\":\"panel_3\"},{\"embeddableConfig\":{},\"gridData\":{\"h\":20,\"i\":\"5\",\"w\":21,\"x\":0,\"y\":0},\"panelIndex\":\"5\",\"version\":\"7.3.1\",\"panelRefName\":\"panel_4\"}]","timeRestore":false,"title":"tmpl - Dashboard","version":1},"id":"tmpl-dashboard","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"tmpl-polarity-dashboard","name":"panel_0","type":"visualization"},{"id":"tmpl-article-list","name":"panel_1","type":"visualization"},{"id":"tmpl-article-imports","name":"panel_2","type":"visualization"},{"id":"tmpl-sentiment","name":"panel_3","type":"visualization"},{"id":"tmpl-stock-price","name":"panel_4","type":"visualization"}],"type":"dashboard","updated_at":"2019-09-18T06:29:35.661Z","version":"WzY1OSwxMl0="} \ No newline at end of file diff --git a/src/news.sentiment.py b/src/news.sentiment.py index a4d4462..d1408e8 100644 --- a/src/news.sentiment.py +++ b/src/news.sentiment.py @@ -20,7 +20,7 @@ from StockSight.EsMap.Sentiment import * -STOCKSIGHT_VERSION = '0.1-b.6' +STOCKSIGHT_VERSION = '0.2' __version__ = STOCKSIGHT_VERSION @@ -35,15 +35,15 @@ logger.info('NLTK tokens required: ' + str(config['symbols'][symbol])) logger.info('NLTK tokens ignored: ' + str(config['sentiment_analyzer']['ignore_words'])) - yahooListener = YahooFinanceListener(symbol) - yahooThread = threading.Thread(target=yahooListener.execute) - yahooThread.start() + yahoo_listener = YahooFinanceListener(symbol) + yahoo_thread = threading.Thread(target=yahoo_listener.execute) + yahoo_thread.start() - seekAlphaListener = SeekAlphaListener(symbol) - seekAlphaThread = threading.Thread(target=seekAlphaListener.execute) - seekAlphaThread.start() + seekalpha_listener = SeekAlphaListener(symbol) + seekalpha_thread = threading.Thread(target=seekalpha_listener.execute) + seekalpha_thread.start() - time.sleep(randint(5, 10)) + time.sleep(randint(config['spawn_intervals']['news_min'], config['spawn_intervals']['news_max'])) except Exception as e: logger.warning("%s" % e) pass diff --git a/src/stockprice.py b/src/stockprice.py index e404806..849f144 100644 --- a/src/stockprice.py +++ b/src/stockprice.py @@ -10,8 +10,7 @@ stocksight is released under the Apache 2.0 license. See LICENSE for the full license text. """ -import argparse -import logging + import sys import threading import time @@ -23,7 +22,7 @@ from StockSight.StockPriceListener import StockPriceListener -STOCKSIGHT_VERSION = '0.1-b.5' +STOCKSIGHT_VERSION = '0.2' __version__ = STOCKSIGHT_VERSION @@ -38,10 +37,10 @@ stockprice = StockPriceListener() - priceThread = threading.Thread(target=stockprice.get_price,args=(symbol,)) - priceThread.start() + price_thread = threading.Thread(target=stockprice.get_price,args=(symbol,)) + price_thread.start() - time.sleep(randint(2,5)) + time.sleep(randint(config['spawn_intervals']['stockprice_min'], config['spawn_intervals']['stockprice_max'])) except Exception as e: logger.warning("%s" % e) diff --git a/src/tweet.sentiment.py b/src/tweet.sentiment.py index 4be0297..11aee7f 100644 --- a/src/tweet.sentiment.py +++ b/src/tweet.sentiment.py @@ -20,7 +20,7 @@ from tweepy import API, Stream, OAuthHandler, TweepError -STOCKSIGHT_VERSION = '0.1-b.6' +STOCKSIGHT_VERSION = '0.2' __version__ = STOCKSIGHT_VERSION diff --git a/README.og.md b/src/v0.1_deprecated/README.og.md similarity index 100% rename from README.og.md rename to src/v0.1_deprecated/README.og.md diff --git a/src/Original/config.sample.py b/src/v0.1_deprecated/config.sample.py similarity index 100% rename from src/Original/config.sample.py rename to src/v0.1_deprecated/config.sample.py diff --git a/src/Original/sentiment.og.py b/src/v0.1_deprecated/sentiment.og.py similarity index 100% rename from src/Original/sentiment.og.py rename to src/v0.1_deprecated/sentiment.og.py diff --git a/src/Original/stockprice.og.py b/src/v0.1_deprecated/stockprice.og.py similarity index 100% rename from src/Original/stockprice.og.py rename to src/v0.1_deprecated/stockprice.og.py diff --git a/tests/StockSight/Helper/RequestRandomizerTest.py b/tests/StockSight/Helper/RequestRandomizerTest.py index 5d54d14..9831443 100644 --- a/tests/StockSight/Helper/RequestRandomizerTest.py +++ b/tests/StockSight/Helper/RequestRandomizerTest.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- """ Request Randomizer Test +Copyright (C) Chris Park 2018-2019 Copyright (C) Allen (Jian Feng) Xie 2019 stocksight is released under the Apache 2.0 license. See LICENSE for the full license text. diff --git a/tests/StockSight/SeekAlphaListenerTest.py b/tests/StockSight/SeekAlphaListenerTest.py index 14f8092..6dd3ad6 100644 --- a/tests/StockSight/SeekAlphaListenerTest.py +++ b/tests/StockSight/SeekAlphaListenerTest.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- """ Seeking Alpha Listener Tests +Copyright (C) Chris Park 2018-2019 Copyright (C) Allen (Jian Feng) Xie 2019 stocksight is released under the Apache 2.0 license. See LICENSE for the full license text. @@ -63,7 +64,7 @@ def test_get_news_headlines_with_body(self): def test_execute(self): self.mainClass.index_name = self.index_name self.mainClass.execute() - time.sleep(5) + time.sleep(3) logs = es.search(index=self.index_name,body="{}") message = logs['hits']['hits'][0]['_source'] self.assertIsNotNone(message['title'], "Title is empty") diff --git a/tests/StockSight/StockPriceListenerTest.py b/tests/StockSight/StockPriceListenerTest.py index 65f3cb1..c985e5b 100644 --- a/tests/StockSight/StockPriceListenerTest.py +++ b/tests/StockSight/StockPriceListenerTest.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- """ Stock Price listener test +Copyright (C) Chris Park 2018-2019 Copyright (C) Allen (Jian Feng) Xie 2019 stocksight is released under the Apache 2.0 license. See LICENSE for the full license text. diff --git a/tests/StockSight/YahooFinanceListenerTest.py b/tests/StockSight/YahooFinanceListenerTest.py index 154d864..bc65d97 100644 --- a/tests/StockSight/YahooFinanceListenerTest.py +++ b/tests/StockSight/YahooFinanceListenerTest.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- """Yahoo Finance News Test +Copyright (C) Chris Park 2018-2019 Copyright (C) Allen (Jian Feng) Xie 2019 stocksight is released under the Apache 2.0 license. See LICENSE for the full license text. @@ -57,7 +58,7 @@ def test_get_news_headlines_with_body(self): def test_execute(self): self.mainClass.index_name = self.index_name self.mainClass.execute() - time.sleep(5) + time.sleep(3) logs = es.search(index=self.index_name,body="{}") message = logs['hits']['hits'][0]['_source'] self.assertIsNotNone(message['title'], "Title is empty") From 097c7745ea427b5c37387f8bd6af1ef5b27dfcd9 Mon Sep 17 00:00:00 2001 From: heyqule Date: Sun, 22 Sep 2019 16:46:03 -0400 Subject: [PATCH 49/55] - Additonal Readme change --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1283bab..3462305 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ Each user running stocksight has a unique fingerprint: specific stocks they are ### Upgrade From 0.1 Version 0.2 went through an architectural revamp. You will have to COPY the v0.1 data from Elastic 5.6 to Elastic 7.3 if you wish to retain your previous data. -The ElasticSearch index mappings are also different between two versions. Please see "src/StockSight/EsMap" files for details. +The ElasticSearch index mappings are also different between two versions. New version records additional data for sentiment and stock prices. Please see "src/StockSight/EsMap" files for details. Differences: 1. Each symbol have its own set of price and sentiment indexes. From efc7387f859416873f26abf2ebbcf1dcf0604cd7 Mon Sep 17 00:00:00 2001 From: heyqule Date: Sun, 22 Sep 2019 17:51:50 -0400 Subject: [PATCH 50/55] - Fix kibana tmp folder issue --- src/startup.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/src/startup.sh b/src/startup.sh index 00da000..3f73a59 100755 --- a/src/startup.sh +++ b/src/startup.sh @@ -52,6 +52,7 @@ done #Copy kibana dashboards echo "Copy kibana dashboard if they don't exist"; +mkdir -p ./kibana_export/tmp python import.kibana.py & tick=0 From 175dd61f82d050894ab2640d088dd69174ef9c43 Mon Sep 17 00:00:00 2001 From: heyqule Date: Sun, 22 Sep 2019 18:09:36 -0400 Subject: [PATCH 51/55] - Minor change to spawn timers --- src/StockSight/NewsHeadlineListener.py | 4 +++- src/config.sample.yml | 4 ++-- src/news.sentiment.py | 3 ++- src/stockprice.py | 3 ++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/StockSight/NewsHeadlineListener.py b/src/StockSight/NewsHeadlineListener.py index 7615e83..c84c419 100644 --- a/src/StockSight/NewsHeadlineListener.py +++ b/src/StockSight/NewsHeadlineListener.py @@ -138,7 +138,9 @@ def can_process(self, article): def get_soup(self, url): #try not to spam the server, but if you run with 100 stock symbols, it's probably going to spam it anyway lol. - time.sleep(randint(config['spawn_intervals']['request_min'], config['spawn_intervals']['request_max'])) + if(config['spawn_intervals']['request_min'] > 0): + time.sleep(randint(config['spawn_intervals']['request_min'], config['spawn_intervals']['request_max'])) + req = requests.get(url) html = req.text soup = BeautifulSoup(html, 'html.parser') diff --git a/src/config.sample.yml b/src/config.sample.yml index e80cfc0..bdd921a 100644 --- a/src/config.sample.yml +++ b/src/config.sample.yml @@ -54,8 +54,8 @@ stock_price: # but if you have tons of symbols, it's probably not going to matter much. spawn_intervals: #Stock price listener spawn timer - stockprice_min: 2 - stockprice_max: 5 + stockprice_min: 0 + stockprice_max: 0 #News Sentiment listen spawn timer news_min: 5 news_max: 10 diff --git a/src/news.sentiment.py b/src/news.sentiment.py index d1408e8..3d17c26 100644 --- a/src/news.sentiment.py +++ b/src/news.sentiment.py @@ -43,7 +43,8 @@ seekalpha_thread = threading.Thread(target=seekalpha_listener.execute) seekalpha_thread.start() - time.sleep(randint(config['spawn_intervals']['news_min'], config['spawn_intervals']['news_max'])) + if(config['spawn_intervals']['news_min'] > 0): + time.sleep(randint(config['spawn_intervals']['news_min'], config['spawn_intervals']['news_max'])) except Exception as e: logger.warning("%s" % e) pass diff --git a/src/stockprice.py b/src/stockprice.py index 849f144..80010bf 100644 --- a/src/stockprice.py +++ b/src/stockprice.py @@ -40,7 +40,8 @@ price_thread = threading.Thread(target=stockprice.get_price,args=(symbol,)) price_thread.start() - time.sleep(randint(config['spawn_intervals']['stockprice_min'], config['spawn_intervals']['stockprice_max'])) + if(config['spawn_intervals']['stockprice_min'] > 0): + time.sleep(randint(config['spawn_intervals']['stockprice_min'], config['spawn_intervals']['stockprice_max'])) except Exception as e: logger.warning("%s" % e) From a17873394f721566d3a842347c38dbcefee3715c Mon Sep 17 00:00:00 2001 From: heyqule Date: Mon, 23 Sep 2019 20:44:21 -0400 Subject: [PATCH 52/55] Minor Refactor --- src/StockSight/Helper/Sentiment.py | 4 ++-- src/StockSight/StockPriceListener.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/StockSight/Helper/Sentiment.py b/src/StockSight/Helper/Sentiment.py index 1d8cfd0..3c82098 100644 --- a/src/StockSight/Helper/Sentiment.py +++ b/src/StockSight/Helper/Sentiment.py @@ -63,8 +63,8 @@ def sentiment_analysis(text, online = False): could be made better :) :param text: """ - sentiment_url = None; - if online == True: + sentiment_url = None + if online: sentiment_url = 'http://text-processing.com/api/sentiment/' # pass text into sentiment url sentiment_url = get_sentiment_from_url(text, sentiment_url) diff --git a/src/StockSight/StockPriceListener.py b/src/StockSight/StockPriceListener.py index 6a5ac6b..4a3cb14 100644 --- a/src/StockSight/StockPriceListener.py +++ b/src/StockSight/StockPriceListener.py @@ -58,7 +58,7 @@ def get_price(self, symbol): raise # build dict to store stock info try: - D = {} + D = [] D['symbol'] = symbol D['last'] = data['chart']['result'][0]['indicators']['quote'][0]['close'][-1] if D['last'] is None: From 6f380250c787b966101d028c25643da230374e3c Mon Sep 17 00:00:00 2001 From: heyqule Date: Fri, 11 Oct 2019 22:21:03 -0400 Subject: [PATCH 53/55] Fix issue found by shaggy63 Disable message body test for seeking alpha --- src/StockSight/StockPriceListener.py | 2 +- src/StockSight/TweetListener.py | 2 +- tests/StockSight/SeekAlphaListenerTest.py | 33 ++++++++++++----------- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/StockSight/StockPriceListener.py b/src/StockSight/StockPriceListener.py index 4a3cb14..6a5ac6b 100644 --- a/src/StockSight/StockPriceListener.py +++ b/src/StockSight/StockPriceListener.py @@ -58,7 +58,7 @@ def get_price(self, symbol): raise # build dict to store stock info try: - D = [] + D = {} D['symbol'] = symbol D['last'] = data['chart']['result'][0]['indicators']['quote'][0]['close'][-1] if D['last'] is None: diff --git a/src/StockSight/TweetListener.py b/src/StockSight/TweetListener.py index 32efc3e..be44369 100644 --- a/src/StockSight/TweetListener.py +++ b/src/StockSight/TweetListener.py @@ -142,7 +142,7 @@ def on_data(self, data): es.index(index=self.index_name, doc_type="_doc", body={ - "_id": redis_id, + "msg_id": redis_id, "author": screen_name, "location": location, "date": created_date, diff --git a/tests/StockSight/SeekAlphaListenerTest.py b/tests/StockSight/SeekAlphaListenerTest.py index 6dd3ad6..6208093 100644 --- a/tests/StockSight/SeekAlphaListenerTest.py +++ b/tests/StockSight/SeekAlphaListenerTest.py @@ -43,22 +43,23 @@ def test_get_news_headlines(self): self.assertIsNotNone(headlines[0].url, "URL is empty") self.assertIsNotNone(headlines[0].referer_url, "Refer URL is empty") - #always fails b/c of 403 requests. - def test_get_news_headlines_with_body(self): - config['news']['follow_link'] = True - headlines = self.mainClass.get_news_headlines() - self.assertGreaterEqual(headlines.__len__(), 1, "Empty Headline / Page returned 403") - self.assertIsInstance(headlines[0], Article, "Is not an Article") - self.assertIsNotNone(headlines[0].title, "Title is empty") - self.assertIsNotNone(headlines[0].url, "URL is empty") - self.assertNotEqual(headlines[0].referer_url, '', "Refer URL is empty") - - empty_bodies = 0 - for message in headlines: - if message.body == '': - empty_bodies += 1 - - self.assertAlmostEqual(empty_bodies, 0, None, "There are %s empty bodies in %s headlines" % (empty_bodies, headlines.__len__()), 5) + # always fails b/c SA returns 403 response after certain threshold. + # Usually returns appox 10 articles in a batch of 30 requests + # def test_get_news_headlines_with_body(self): + # config['news']['follow_link'] = True + # headlines = self.mainClass.get_news_headlines() + # self.assertGreaterEqual(headlines.__len__(), 1, "Empty Headline / Page returned 403") + # self.assertIsInstance(headlines[0], Article, "Is not an Article") + # self.assertIsNotNone(headlines[0].title, "Title is empty") + # self.assertIsNotNone(headlines[0].url, "URL is empty") + # self.assertNotEqual(headlines[0].referer_url, '', "Refer URL is empty") + # + # empty_bodies = 0 + # for message in headlines: + # if message.body == '': + # empty_bodies += 1 + # + # self.assertAlmostEqual(empty_bodies, 0, None, "There are %s empty bodies in %s headlines" % (empty_bodies, headlines.__len__()), 5) def test_execute(self): From 646b0d9ff35310f72eb32558bd948609bb78cca2 Mon Sep 17 00:00:00 2001 From: heyqule Date: Fri, 11 Oct 2019 22:42:57 -0400 Subject: [PATCH 54/55] Disable unnecessary exposed ports --- docker-compose.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 6f710b7..2d5959a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -23,15 +23,15 @@ services: soft: 4096 hard: 4096 #expose this port for local dev only! - ports: - - "9200:9200" + #ports: + # - "9200:9200" restart: unless-stopped redis: build: context: ./redis-docker #expose this port for local dev only! - ports: - - "6379:6379" + #ports: + # - "6379:6379" restart: unless-stopped kibana: image: docker.elastic.co/kibana/kibana:7.3.1 From b985410485d515027bf6735d1215476c2f604bfc Mon Sep 17 00:00:00 2001 From: heyqule Date: Wed, 16 Oct 2019 19:51:30 -0400 Subject: [PATCH 55/55] Add copyright blocks to non-py files --- docker-compose.yml | 5 +++++ python-docker/Dockerfile | 4 ++++ redis-docker/Dockerfile | 4 ++++ src/startup.sh | 1 + src/tweet.sentiment.py | 4 ++-- 5 files changed, 16 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 2d5959a..dab834d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,9 @@ # ./docker-compose.yml +# +#Copyright (C) Chris Park 2018-2019 +#Copyright (C) Allen (Jian Feng) Xie 2019 +#stocksight is released under the Apache 2.0 license. See +#LICENSE for the full license text. version: '3' services: diff --git a/python-docker/Dockerfile b/python-docker/Dockerfile index 42557c2..c4bc92c 100644 --- a/python-docker/Dockerfile +++ b/python-docker/Dockerfile @@ -1,3 +1,7 @@ +#Copyright (C) Chris Park 2018-2019 +#Copyright (C) Allen (Jian Feng) Xie 2019 +#stocksight is released under the Apache 2.0 license. See +#LICENSE for the full license text. FROM python:3-alpine LABEL maintainer="Allen (Jian Feng) Xie" diff --git a/redis-docker/Dockerfile b/redis-docker/Dockerfile index 8f890d7..5a21d3e 100644 --- a/redis-docker/Dockerfile +++ b/redis-docker/Dockerfile @@ -1,3 +1,7 @@ +#Copyright (C) Chris Park 2018-2019 +#Copyright (C) Allen (Jian Feng) Xie 2019 +#stocksight is released under the Apache 2.0 license. See +#LICENSE for the full license text. FROM redis:5-alpine LABEL maintainer="Allen (Jian Feng) Xie" COPY redis.conf /usr/local/etc/redis/redis.conf diff --git a/src/startup.sh b/src/startup.sh index 3f73a59..da2df04 100755 --- a/src/startup.sh +++ b/src/startup.sh @@ -1,5 +1,6 @@ #!/bin/bash +#Copyright (C) Chris Park 2018-2019 #Copyright (C) Allen (Jian Feng) Xie 2019 #stocksight is released under the Apache 2.0 license. See #LICENSE for the full license text. diff --git a/src/tweet.sentiment.py b/src/tweet.sentiment.py index 11aee7f..fbfb46c 100644 --- a/src/tweet.sentiment.py +++ b/src/tweet.sentiment.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -"""stockprice.py - get stock price from Yahoo and add to -Elasticsearch. +""" +tweet sentiment runner See README.md or https://github.com/shirosaidev/stocksight for more information.