Skip to content
This repository has been archived by the owner on Nov 18, 2018. It is now read-only.

Commit

Permalink
tut1 & tut2
Browse files Browse the repository at this point in the history
  • Loading branch information
Xiaochuang Han committed Oct 2, 2017
0 parents commit 8d2fd5c
Show file tree
Hide file tree
Showing 2 changed files with 241 additions and 0 deletions.
12 changes: 12 additions & 0 deletions mongodb_tut1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from pymongo import MongoClient

db = MongoClient('mongodb://143.215.138.132:27017')['big_data']

sample = {'$sample': {'size': 3}}

pipeline = [sample]

query_result = db.tweet.aggregate(pipeline)

for tweet in query_result:
print tweet['text']
229 changes: 229 additions & 0 deletions mongodb_tut2.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"## Timestamp Range"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from pymongo import MongoClient\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"\n",
"db = MongoClient('mongodb://143.215.138.132:27017')['big_data']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"# Sample Tweet:\n",
"# { \"_id\" : ObjectId(\"5819377b8548676a7239588c\"),\n",
"# \"guid\" : \"793613871756013568\",\n",
"# \"text\" : \"#incrediblessuperdanceparty can you play all of the Spanish songs?!\",\n",
"# \"author_id\" : \"770418732665565185\",\n",
"# \"author_name\" : \"Kayle Figueiredo\",\n",
"# \"author_followers_count\" : 15,\n",
"# \"timestamp\" : 1478047266,\n",
"# \"lon\" : -83.804475,\n",
"# \"lat\" : 27.698681999999998,\n",
"# \"words\" : [ \"can\", \"you\", \"play\", \"all\", \"of\", \"the\", \"spanish\", \"songs\" ],\n",
"# \"keywords\" : [ \"play\", \"songs\", \"spanish\" ],\n",
"# \"hashtags\" : [ \"incrediblessuperdanceparty\" ],\n",
"# \"mentions_id\" : [ ],\n",
"# \"mentions_name\" : [ ],\n",
"# \"urls\" : [ ],\n",
"# \"favorite_count\" : 0,\n",
"# \"retweet_count\" : 0 }"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"def find_first_timestamp():\n",
" sort = {'$sort': {'timestamp': 1}}\n",
" limit = {'$limit': 1}\n",
" pipeline = [sort, limit]\n",
" return list(db.tweet_subset.aggregate(pipeline))[0]['timestamp']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"def find_last_timestamp():\n",
" sort = {'$sort': {'timestamp': -1}}\n",
" limit = {'$limit': 1}\n",
" pipeline = [sort, limit]\n",
" return list(db.tweet_subset.aggregate(pipeline))[0]['timestamp']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"def get_timestamp_list():\n",
" timestamp_list = []\n",
" for tweet in list(db.tweet_subset.find()):\n",
" timestamp_list.append(tweet['timestamp'])\n",
" return timestamp_list"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"%time print find_first_timestamp()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"%time print find_last_timestamp()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"%time print min(get_timestamp_list())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"%time print max(get_timestamp_list())"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"## Histogram"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"timestamp_list = get_timestamp_list()\n",
"\n",
"bins = np.linspace(find_first_timestamp(), find_last_timestamp(), 100)\n",
"\n",
"plt.hist(timestamp_list, bins=bins)\n",
"\n",
"plt.title('Tweet Timeline')\n",
"plt.xlabel('Timestamp')\n",
"plt.ylabel('Number of Tweets')\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 8d2fd5c

Please sign in to comment.