diff --git a/docs/_build/doctrees/advertools.crawlytics.doctree b/docs/_build/doctrees/advertools.crawlytics.doctree
index 425bf907..54d8bd7c 100644
Binary files a/docs/_build/doctrees/advertools.crawlytics.doctree and b/docs/_build/doctrees/advertools.crawlytics.doctree differ
diff --git a/docs/_build/doctrees/environment.pickle b/docs/_build/doctrees/environment.pickle
index 6011344f..56919d31 100644
Binary files a/docs/_build/doctrees/environment.pickle and b/docs/_build/doctrees/environment.pickle differ
diff --git a/docs/_build/html/_modules/advertools/crawlytics.html b/docs/_build/html/_modules/advertools/crawlytics.html
index 507dab2a..d9b0fa1a 100644
--- a/docs/_build/html/_modules/advertools/crawlytics.html
+++ b/docs/_build/html/_modules/advertools/crawlytics.html
@@ -154,7 +154,7 @@ <h1>Source code for advertools.crawlytics</h1><div class="highlight"><pre>
 
 <span class="sd">&gt;&gt;&gt; import advertools as adv</span>
 <span class="sd">&gt;&gt;&gt; import pandas as pd</span>
-<span class="sd">&gt;&gt;&gt; crawldf = pd.read_json(&#39;path/to/output_file.jl&#39;, lines=True)</span>
+<span class="sd">&gt;&gt;&gt; crawldf = pd.read_json(&quot;path/to/output_file.jl&quot;, lines=True)</span>
 <span class="sd">&gt;&gt;&gt; img_df = adv.crawlytics.images(crawldf)</span>
 <span class="sd">&gt;&gt;&gt; img_df</span>
 
@@ -211,7 +211,7 @@ <h1>Source code for advertools.crawlytics</h1><div class="highlight"><pre>
 <span class="sd">The ``crawlytics.links`` function gives you a summary of the links, that is similar to</span>
 <span class="sd">the format of the ``crawlytics.images`` DataFrame.</span>
 
-<span class="sd">&gt;&gt;&gt; link_df = adv.crawlytics.links(crawldf, internal_url_regex=&#39;nytimes.com&#39;)</span>
+<span class="sd">&gt;&gt;&gt; link_df = adv.crawlytics.links(crawldf, internal_url_regex=&quot;nytimes.com&quot;)</span>
 <span class="sd">&gt;&gt;&gt; link_df</span>
 
 <span class="sd">====  ===========================================================  ========================================================================  ==================  ==========  ==========</span>
@@ -301,9 +301,10 @@ <h1>Source code for advertools.crawlytics</h1><div class="highlight"><pre>
 <span class="sd">columns of interest, write them to a new file, and delete the old large crawl file.</span>
 
 <span class="sd">&gt;&gt;&gt; crawl_subset = adv.crawlytics.jl_subset(</span>
-<span class="sd">...    filepath=&#39;/path/to/output_file.jl&#39;,</span>
-<span class="sd">...    columns=[col1, col2, ...],</span>
-<span class="sd">...    regex=column_regex)</span>
+<span class="sd">...     filepath=&quot;/path/to/output_file.jl&quot;,</span>
+<span class="sd">...     columns=[col1, col2, ...],</span>
+<span class="sd">...     regex=column_regex,</span>
+<span class="sd">... )</span>
 
 <span class="sd">You can use the ``columns`` parameter to specify exactly which columns you want. You can</span>
 <span class="sd">also use a regular expression to specify a set of columns. Here are some examples of</span>
@@ -346,7 +347,7 @@ <h1>Source code for advertools.crawlytics</h1><div class="highlight"><pre>
 <span class="sd">One of the main advantags of using parquet is that you can select which columns you want</span>
 <span class="sd">to read.</span>
 
-<span class="sd">&gt;&gt;&gt; adv.crawlytics.parquet_columns(&#39;output_file.parquet&#39;) # first 15 columns only</span>
+<span class="sd">&gt;&gt;&gt; adv.crawlytics.parquet_columns(&quot;output_file.parquet&quot;)  # first 15 columns only</span>
 
 <span class="sd">====  ==============  ======</span>
 <span class="sd">  ..  column          type</span>
@@ -370,7 +371,7 @@ <h1>Source code for advertools.crawlytics</h1><div class="highlight"><pre>
 
 <span class="sd">Check how many columns we have of each type.</span>
 
-<span class="sd">&gt;&gt;&gt; adv.crawlytics.parquet_columns(&#39;nyt_crawl.parquet&#39;)[&#39;type&#39;].value_counts()</span>
+<span class="sd">&gt;&gt;&gt; adv.crawlytics.parquet_columns(&quot;nyt_crawl.parquet&quot;)[&quot;type&quot;].value_counts()</span>
 
 <span class="sd">====  =========================================================================================================================================================  =======</span>
 <span class="sd">  ..  type                                                                                                                                                         count</span>
@@ -410,6 +411,7 @@ <h1>Source code for advertools.crawlytics</h1><div class="highlight"><pre>
     <span class="s2">&quot;jl_to_parquet&quot;</span><span class="p">,</span>
     <span class="s2">&quot;parquet_columns&quot;</span><span class="p">,</span>
     <span class="s2">&quot;compare&quot;</span><span class="p">,</span>
+    <span class="s2">&quot;running_crawls&quot;</span><span class="p">,</span>
 <span class="p">]</span>
 
 
@@ -433,7 +435,7 @@ <h1>Source code for advertools.crawlytics</h1><div class="highlight"><pre>
 <span class="sd">    --------</span>
 <span class="sd">    &gt;&gt;&gt; import advertools as adv</span>
 <span class="sd">    &gt;&gt;&gt; import pandas as pd</span>
-<span class="sd">    &gt;&gt;&gt; crawldf = pd.read_json(&#39;output_file.jl&#39;, lines=True)</span>
+<span class="sd">    &gt;&gt;&gt; crawldf = pd.read_json(&quot;output_file.jl&quot;, lines=True)</span>
 <span class="sd">    &gt;&gt;&gt; redirect_df = adv.crawlytics.redirects(crawldf)</span>
 <span class="sd">    &gt;&gt;&gt; redirect_df</span>
 
@@ -516,7 +518,7 @@ <h1>Source code for advertools.crawlytics</h1><div class="highlight"><pre>
 <span class="sd">    --------</span>
 <span class="sd">    &gt;&gt;&gt; import advertools as adv</span>
 <span class="sd">    &gt;&gt;&gt; import pandas as pd</span>
-<span class="sd">    &gt;&gt;&gt; crawldf = pd.read_json(&#39;output_file.jl&#39;, lines=True)</span>
+<span class="sd">    &gt;&gt;&gt; crawldf = pd.read_json(&quot;output_file.jl&quot;, lines=True)</span>
 <span class="sd">    &gt;&gt;&gt; link_df = adv.crawlytics.links(crawldf)</span>
 <span class="sd">    &gt;&gt;&gt; link_df</span>
 
@@ -581,7 +583,7 @@ <h1>Source code for advertools.crawlytics</h1><div class="highlight"><pre>
 <span class="sd">    --------</span>
 <span class="sd">    &gt;&gt;&gt; import advertools as adv</span>
 <span class="sd">    &gt;&gt;&gt; import pandas as pd</span>
-<span class="sd">    &gt;&gt;&gt; crawldf = pd.read_json(&#39;output_file.jl&#39;, lines=True)</span>
+<span class="sd">    &gt;&gt;&gt; crawldf = pd.read_json(&quot;output_file.jl&quot;, lines=True)</span>
 <span class="sd">    &gt;&gt;&gt; image_df = adv.crawlytics.images(crawldf)</span>
 <span class="sd">    &gt;&gt;&gt; image_df</span>
 
@@ -643,15 +645,17 @@ <h1>Source code for advertools.crawlytics</h1><div class="highlight"><pre>
 
 <span class="sd">    Read only the columns &quot;url&quot; and &quot;meta_desc&quot;:</span>
 
-<span class="sd">    &gt;&gt;&gt; adv.crawlytics.jl_subset(&#39;output_file.jl&#39;, columns=[&#39;url&#39;, &#39;meta_desc&#39;])</span>
+<span class="sd">    &gt;&gt;&gt; adv.crawlytics.jl_subset(&quot;output_file.jl&quot;, columns=[&quot;url&quot;, &quot;meta_desc&quot;])</span>
 
 <span class="sd">    Read columns matching the regex &quot;jsonld&quot;:</span>
 
-<span class="sd">    &gt;&gt;&gt; adv.crawlytics.jl_subset(&#39;output_file.jl&#39;, regex=&#39;jsonld&#39;)</span>
+<span class="sd">    &gt;&gt;&gt; adv.crawlytics.jl_subset(&quot;output_file.jl&quot;, regex=&quot;jsonld&quot;)</span>
 
 <span class="sd">    Read the columns &quot;url&quot; and &quot;meta_desc&quot; as well as columns matching &quot;jsonld&quot;:</span>
 
-<span class="sd">    &gt;&gt;&gt; adv.crawlytics.jl_subset(&#39;output_file.jl&#39;, columns=[&#39;url&#39;, &#39;meta_desc&#39;], regex=&#39;jsonld&#39;)</span>
+<span class="sd">    &gt;&gt;&gt; adv.crawlytics.jl_subset(</span>
+<span class="sd">    ...     &quot;output_file.jl&quot;, columns=[&quot;url&quot;, &quot;meta_desc&quot;], regex=&quot;jsonld&quot;</span>
+<span class="sd">    ... )</span>
 
 <span class="sd">    Returns</span>
 <span class="sd">    -------</span>
@@ -766,9 +770,9 @@ <h1>Source code for advertools.crawlytics</h1><div class="highlight"><pre>
 
 <span class="sd">    &gt;&gt;&gt; import advertools as adv</span>
 <span class="sd">    &gt;&gt;&gt; import pandas as pd</span>
-<span class="sd">    &gt;&gt;&gt; df1 = pd.read_json(&#39;output_file1.jl&#39;, lines=True)</span>
-<span class="sd">    &gt;&gt;&gt; df2 = pd.read_json(&#39;output_file2.jl&#39;, lines=True)</span>
-<span class="sd">    &gt;&gt;&gt; adv.crawlytics.compare(df1, df1, &#39;size&#39;)</span>
+<span class="sd">    &gt;&gt;&gt; df1 = pd.read_json(&quot;output_file1.jl&quot;, lines=True)</span>
+<span class="sd">    &gt;&gt;&gt; df2 = pd.read_json(&quot;output_file2.jl&quot;, lines=True)</span>
+<span class="sd">    &gt;&gt;&gt; adv.crawlytics.compare(df1, df1, &quot;size&quot;)</span>
 
 <span class="sd">    ====  ==========================  ========  ========  ======  ===========</span>
 <span class="sd">      ..  url                           size_x    size_y    diff    diff_perc</span>
@@ -806,6 +810,8 @@ <h1>Source code for advertools.crawlytics</h1><div class="highlight"><pre>
 
 
 
+<div class="viewcode-block" id="running_crawls">
+<a class="viewcode-back" href="../../advertools.crawlytics.html#advertools.crawlytics.running_crawls">[docs]</a>
 <span class="k">def</span> <span class="nf">running_crawls</span><span class="p">():</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;Get details of currently running spiders.</span>
 
@@ -816,11 +822,47 @@ <h1>Source code for advertools.crawlytics</h1><div class="highlight"><pre>
 <span class="sd">    * elapsed: The elapsed time since the spider started.</span>
 <span class="sd">    * %mem: The percentage of memory that this spider is consuming.</span>
 <span class="sd">    * %cpu: The percentage of CPU that this spider is consuming.</span>
-<span class="sd">    * args: The full command that was used to start this spider. Use this to identify</span>
+<span class="sd">    * command: The command that was used to start this spider. Use this to identify</span>
 <span class="sd">      the spider(s) that you want to know about.</span>
 <span class="sd">    * output_file: The path to the output file for each running crawl job.</span>
 <span class="sd">    * crawled_urls: The current number of lines in ``output_file``.</span>
+
+<span class="sd">    Examples</span>
+<span class="sd">    --------</span>
+<span class="sd">    While a crawl is running:</span>
+
+<span class="sd">    &gt;&gt;&gt; import advertools as adv</span>
+<span class="sd">    &gt;&gt;&gt; adv.crawlytics.running_crawls()</span>
+
+<span class="sd">    ====  ======  =========  =========  ======  ======  =========================================================================================================================================================================================================================================================================================================================================================================================================  =============  ==============</span>
+<span class="sd">      ..     pid  started    elapsed      %mem    %cpu  command                                                                                                                                                                                                                                                                                                                                                                                                    output_file      crawled_urls</span>
+<span class="sd">    ====  ======  =========  =========  ======  ======  =========================================================================================================================================================================================================================================================================================================================================================================================================  =============  ==============</span>
+<span class="sd">       0  195720  21:41:14   00:11         1.1     103  /opt/tljh/user/bin/python /opt/tljh/user/bin/scrapy runspider /opt/tljh/user/lib/python3.10/site-packages/advertools/spider.py -a url_list=https://cnn.com -a allowed_domains=cnn.com -a follow_links=True -a exclude_url_params=None -a include_url_params=None -a exclude_url_regex=None -a include_url_regex=None -a css_selectors=None -a xpath_selectors=None -o cnn.jl -s CLOSESPIDER_PAGECOUNT=200  cnn.jl                     30</span>
+<span class="sd">    ====  ======  =========  =========  ======  ======  =========================================================================================================================================================================================================================================================================================================================================================================================================  =============  ==============</span>
+
+<span class="sd">    After a few moments:</span>
+
+<span class="sd">    &gt;&gt;&gt; adv.crawlytics.running_crawls()</span>
+
+<span class="sd">    ====  ======  =========  =========  ======  ======  =========================================================================================================================================================================================================================================================================================================================================================================================================  =============  ==============</span>
+<span class="sd">      ..     pid  started    elapsed      %mem    %cpu  command                                                                                                                                                                                                                                                                                                                                                                                                    output_file      crawled_urls</span>
+<span class="sd">    ====  ======  =========  =========  ======  ======  =========================================================================================================================================================================================================================================================================================================================================================================================================  =============  ==============</span>
+<span class="sd">       0  195720  21:41:14   00:27         1.2    96.7  /opt/tljh/user/bin/python /opt/tljh/user/bin/scrapy runspider /opt/tljh/user/lib/python3.10/site-packages/advertools/spider.py -a url_list=https://cnn.com -a allowed_domains=cnn.com -a follow_links=True -a exclude_url_params=None -a include_url_params=None -a exclude_url_regex=None -a include_url_regex=None -a css_selectors=None -a xpath_selectors=None -o cnn.jl -s CLOSESPIDER_PAGECOUNT=200  cnn.jl                     72</span>
+<span class="sd">    ====  ======  =========  =========  ======  ======  =========================================================================================================================================================================================================================================================================================================================================================================================================  =============  ==============</span>
+
+<span class="sd">    After starting a new crawl:</span>
+
+<span class="sd">    &gt;&gt;&gt; adv.crawlytics.running_crawls()</span>
+
+<span class="sd">    ====  ======  =========  =========  ======  ======  =================================================================================================================================================================================================================================================================================================================================================================================================================  =============  ==============</span>
+<span class="sd">      ..     pid  started    elapsed      %mem    %cpu  command                                                                                                                                                                                                                                                                                                                                                                                                            output_file      crawled_urls</span>
+<span class="sd">    ====  ======  =========  =========  ======  ======  =================================================================================================================================================================================================================================================================================================================================================================================================================  =============  ==============</span>
+<span class="sd">       0  195720  21:41:14   01:02         1.6    95.7  /opt/tljh/user/bin/python /opt/tljh/user/bin/scrapy runspider /opt/tljh/user/lib/python3.10/site-packages/advertools/spider.py -a url_list=https://cnn.com -a allowed_domains=cnn.com -a follow_links=True -a exclude_url_params=None -a include_url_params=None -a exclude_url_regex=None -a include_url_regex=None -a css_selectors=None -a xpath_selectors=None -o cnn.jl -s CLOSESPIDER_PAGECOUNT=200          cnn.jl                    154</span>
+<span class="sd">       1  195769  21:42:09   00:07         0.4    83.8  /opt/tljh/user/bin/python /opt/tljh/user/bin/scrapy runspider /opt/tljh/user/lib/python3.10/site-packages/advertools/spider.py -a url_list=https://nytimes.com -a allowed_domains=nytimes.com -a follow_links=True -a exclude_url_params=None -a include_url_params=None -a exclude_url_regex=None -a include_url_regex=None -a css_selectors=None -a xpath_selectors=None -o nyt.jl -s CLOSESPIDER_PAGECOUNT=200  nyt.jl                     17</span>
+<span class="sd">    ====  ======  =========  =========  ======  ======  =================================================================================================================================================================================================================================================================================================================================================================================================================  =============  ==============</span>
 <span class="sd">    &quot;&quot;&quot;</span>
+    <span class="k">if</span> <span class="n">platform</span><span class="o">.</span><span class="n">system</span><span class="p">()</span> <span class="o">==</span> <span class="s2">&quot;Windows&quot;</span><span class="p">:</span>
+        <span class="k">return</span> <span class="s2">&quot;This is function does not support Windows yet. Will be, soon. Sorry!&quot;</span>
     <span class="n">ps</span> <span class="o">=</span> <span class="n">run</span><span class="p">([</span><span class="s2">&quot;ps&quot;</span><span class="p">,</span> <span class="s2">&quot;xo&quot;</span><span class="p">,</span> <span class="s2">&quot;pid,start,etime,%mem,</span><span class="si">%c</span><span class="s2">pu,args&quot;</span><span class="p">])</span>
     <span class="n">ps_stdout</span> <span class="o">=</span> <span class="n">ps</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">splitlines</span><span class="p">()</span>
     <span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span>
@@ -842,7 +884,8 @@ <h1>Source code for advertools.crawlytics</h1><div class="highlight"><pre>
         <span class="n">crawl_urls</span> <span class="o">=</span> <span class="n">crawl_urls</span><span class="p">[:</span> <span class="nb">min</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">crawl_urls</span><span class="p">),</span> <span class="nb">len</span><span class="p">(</span><span class="n">df_subset</span><span class="p">))]</span>
         <span class="n">df_subset</span><span class="p">[</span><span class="s2">&quot;crawled_urls&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">crawl_urls</span>
     <span class="n">df_subset</span><span class="o">.</span><span class="n">columns</span> <span class="o">=</span> <span class="n">df_subset</span><span class="o">.</span><span class="n">columns</span><span class="o">.</span><span class="n">str</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
-    <span class="k">return</span> <span class="n">df_subset</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">columns</span><span class="o">=</span><span class="p">{</span><span class="s2">&quot;args&quot;</span><span class="p">:</span> <span class="s2">&quot;command&quot;</span><span class="p">})</span>
+    <span class="k">return</span> <span class="n">df_subset</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">columns</span><span class="o">=</span><span class="p">{</span><span class="s2">&quot;args&quot;</span><span class="p">:</span> <span class="s2">&quot;command&quot;</span><span class="p">})</span></div>
+
 </pre></div>
 
            </div>
diff --git a/docs/_build/html/advertools.crawlytics.html b/docs/_build/html/advertools.crawlytics.html
index 02d00e67..d0da7a71 100644
--- a/docs/_build/html/advertools.crawlytics.html
+++ b/docs/_build/html/advertools.crawlytics.html
@@ -156,7 +156,7 @@ <h2>Analyzing crawled images<a class="headerlink" href="#analyzing-crawled-image
 in this DataFrame as follows:</p>
 <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">advertools</span> <span class="k">as</span> <span class="nn">adv</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
-<span class="gp">&gt;&gt;&gt; </span><span class="n">crawldf</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_json</span><span class="p">(</span><span class="s1">&#39;path/to/output_file.jl&#39;</span><span class="p">,</span> <span class="n">lines</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="n">crawldf</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_json</span><span class="p">(</span><span class="s2">&quot;path/to/output_file.jl&quot;</span><span class="p">,</span> <span class="n">lines</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">img_df</span> <span class="o">=</span> <span class="n">adv</span><span class="o">.</span><span class="n">crawlytics</span><span class="o">.</span><span class="n">images</span><span class="p">(</span><span class="n">crawldf</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">img_df</span>
 </pre></div>
@@ -333,7 +333,7 @@ <h2>Analyzing links in a crawled website<a class="headerlink" href="#analyzing-l
 linked, internally and externally.</p>
 <p>The <code class="docutils literal notranslate"><span class="pre">crawlytics.links</span></code> function gives you a summary of the links, that is similar to
 the format of the <code class="docutils literal notranslate"><span class="pre">crawlytics.images</span></code> DataFrame.</p>
-<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">link_df</span> <span class="o">=</span> <span class="n">adv</span><span class="o">.</span><span class="n">crawlytics</span><span class="o">.</span><span class="n">links</span><span class="p">(</span><span class="n">crawldf</span><span class="p">,</span> <span class="n">internal_url_regex</span><span class="o">=</span><span class="s1">&#39;nytimes.com&#39;</span><span class="p">)</span>
+<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">link_df</span> <span class="o">=</span> <span class="n">adv</span><span class="o">.</span><span class="n">crawlytics</span><span class="o">.</span><span class="n">links</span><span class="p">(</span><span class="n">crawldf</span><span class="p">,</span> <span class="n">internal_url_regex</span><span class="o">=</span><span class="s2">&quot;nytimes.com&quot;</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">link_df</span>
 </pre></div>
 </div>
@@ -610,9 +610,10 @@ <h2>Handling very large crawl files<a class="headerlink" href="#handling-very-la
 In some cases you only want a small set of columns, you can read the DataFrame with the
 columns of interest, write them to a new file, and delete the old large crawl file.</p>
 <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">crawl_subset</span> <span class="o">=</span> <span class="n">adv</span><span class="o">.</span><span class="n">crawlytics</span><span class="o">.</span><span class="n">jl_subset</span><span class="p">(</span>
-<span class="gp">... </span>   <span class="n">filepath</span><span class="o">=</span><span class="s1">&#39;/path/to/output_file.jl&#39;</span><span class="p">,</span>
-<span class="gp">... </span>   <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="n">col1</span><span class="p">,</span> <span class="n">col2</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span>
-<span class="gp">... </span>   <span class="n">regex</span><span class="o">=</span><span class="n">column_regex</span><span class="p">)</span>
+<span class="gp">... </span>    <span class="n">filepath</span><span class="o">=</span><span class="s2">&quot;/path/to/output_file.jl&quot;</span><span class="p">,</span>
+<span class="gp">... </span>    <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="n">col1</span><span class="p">,</span> <span class="n">col2</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span>
+<span class="gp">... </span>    <span class="n">regex</span><span class="o">=</span><span class="n">column_regex</span><span class="p">,</span>
+<span class="gp">... </span><span class="p">)</span>
 </pre></div>
 </div>
 <p>You can use the <code class="docutils literal notranslate"><span class="pre">columns</span></code> parameter to specify exactly which columns you want. You can
@@ -651,7 +652,7 @@ <h2>Exploring the columns and data types of parquet files<a class="headerlink" h
 <p>Another simple function gives us a DataFrame of the available columns in a parquet file.
 One of the main advantags of using parquet is that you can select which columns you want
 to read.</p>
-<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">adv</span><span class="o">.</span><span class="n">crawlytics</span><span class="o">.</span><span class="n">parquet_columns</span><span class="p">(</span><span class="s1">&#39;output_file.parquet&#39;</span><span class="p">)</span> <span class="c1"># first 15 columns only</span>
+<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">adv</span><span class="o">.</span><span class="n">crawlytics</span><span class="o">.</span><span class="n">parquet_columns</span><span class="p">(</span><span class="s2">&quot;output_file.parquet&quot;</span><span class="p">)</span>  <span class="c1"># first 15 columns only</span>
 </pre></div>
 </div>
 <table class="docutils align-default">
@@ -725,7 +726,7 @@ <h2>Exploring the columns and data types of parquet files<a class="headerlink" h
 </tbody>
 </table>
 <p>Check how many columns we have of each type.</p>
-<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">adv</span><span class="o">.</span><span class="n">crawlytics</span><span class="o">.</span><span class="n">parquet_columns</span><span class="p">(</span><span class="s1">&#39;nyt_crawl.parquet&#39;</span><span class="p">)[</span><span class="s1">&#39;type&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">value_counts</span><span class="p">()</span>
+<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">adv</span><span class="o">.</span><span class="n">crawlytics</span><span class="o">.</span><span class="n">parquet_columns</span><span class="p">(</span><span class="s2">&quot;nyt_crawl.parquet&quot;</span><span class="p">)[</span><span class="s2">&quot;type&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">value_counts</span><span class="p">()</span>
 </pre></div>
 </div>
 <table class="docutils align-default">
@@ -814,9 +815,9 @@ <h2>Module functions<a class="headerlink" href="#module-functions" title="Link t
 <p class="rubric">Examples</p>
 <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">advertools</span> <span class="k">as</span> <span class="nn">adv</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
-<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_json</span><span class="p">(</span><span class="s1">&#39;output_file1.jl&#39;</span><span class="p">,</span> <span class="n">lines</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_json</span><span class="p">(</span><span class="s1">&#39;output_file2.jl&#39;</span><span class="p">,</span> <span class="n">lines</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-<span class="gp">&gt;&gt;&gt; </span><span class="n">adv</span><span class="o">.</span><span class="n">crawlytics</span><span class="o">.</span><span class="n">compare</span><span class="p">(</span><span class="n">df1</span><span class="p">,</span> <span class="n">df1</span><span class="p">,</span> <span class="s1">&#39;size&#39;</span><span class="p">)</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_json</span><span class="p">(</span><span class="s2">&quot;output_file1.jl&quot;</span><span class="p">,</span> <span class="n">lines</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_json</span><span class="p">(</span><span class="s2">&quot;output_file2.jl&quot;</span><span class="p">,</span> <span class="n">lines</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="n">adv</span><span class="o">.</span><span class="n">crawlytics</span><span class="o">.</span><span class="n">compare</span><span class="p">(</span><span class="n">df1</span><span class="p">,</span> <span class="n">df1</span><span class="p">,</span> <span class="s2">&quot;size&quot;</span><span class="p">)</span>
 </pre></div>
 </div>
 <table class="docutils align-default">
@@ -888,7 +889,7 @@ <h2>Module functions<a class="headerlink" href="#module-functions" title="Link t
 <p class="rubric">Examples</p>
 <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">advertools</span> <span class="k">as</span> <span class="nn">adv</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
-<span class="gp">&gt;&gt;&gt; </span><span class="n">crawldf</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_json</span><span class="p">(</span><span class="s1">&#39;output_file.jl&#39;</span><span class="p">,</span> <span class="n">lines</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="n">crawldf</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_json</span><span class="p">(</span><span class="s2">&quot;output_file.jl&quot;</span><span class="p">,</span> <span class="n">lines</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">image_df</span> <span class="o">=</span> <span class="n">adv</span><span class="o">.</span><span class="n">crawlytics</span><span class="o">.</span><span class="n">images</span><span class="p">(</span><span class="n">crawldf</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">image_df</span>
 </pre></div>
@@ -1030,15 +1031,17 @@ <h2>Module functions<a class="headerlink" href="#module-functions" title="Link t
 </pre></div>
 </div>
 <p>Read only the columns &quot;url&quot; and &quot;meta_desc&quot;:</p>
-<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">adv</span><span class="o">.</span><span class="n">crawlytics</span><span class="o">.</span><span class="n">jl_subset</span><span class="p">(</span><span class="s1">&#39;output_file.jl&#39;</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;url&#39;</span><span class="p">,</span> <span class="s1">&#39;meta_desc&#39;</span><span class="p">])</span>
+<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">adv</span><span class="o">.</span><span class="n">crawlytics</span><span class="o">.</span><span class="n">jl_subset</span><span class="p">(</span><span class="s2">&quot;output_file.jl&quot;</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;url&quot;</span><span class="p">,</span> <span class="s2">&quot;meta_desc&quot;</span><span class="p">])</span>
 </pre></div>
 </div>
 <p>Read columns matching the regex &quot;jsonld&quot;:</p>
-<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">adv</span><span class="o">.</span><span class="n">crawlytics</span><span class="o">.</span><span class="n">jl_subset</span><span class="p">(</span><span class="s1">&#39;output_file.jl&#39;</span><span class="p">,</span> <span class="n">regex</span><span class="o">=</span><span class="s1">&#39;jsonld&#39;</span><span class="p">)</span>
+<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">adv</span><span class="o">.</span><span class="n">crawlytics</span><span class="o">.</span><span class="n">jl_subset</span><span class="p">(</span><span class="s2">&quot;output_file.jl&quot;</span><span class="p">,</span> <span class="n">regex</span><span class="o">=</span><span class="s2">&quot;jsonld&quot;</span><span class="p">)</span>
 </pre></div>
 </div>
 <p>Read the columns &quot;url&quot; and &quot;meta_desc&quot; as well as columns matching &quot;jsonld&quot;:</p>
-<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">adv</span><span class="o">.</span><span class="n">crawlytics</span><span class="o">.</span><span class="n">jl_subset</span><span class="p">(</span><span class="s1">&#39;output_file.jl&#39;</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;url&#39;</span><span class="p">,</span> <span class="s1">&#39;meta_desc&#39;</span><span class="p">],</span> <span class="n">regex</span><span class="o">=</span><span class="s1">&#39;jsonld&#39;</span><span class="p">)</span>
+<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">adv</span><span class="o">.</span><span class="n">crawlytics</span><span class="o">.</span><span class="n">jl_subset</span><span class="p">(</span>
+<span class="gp">... </span>    <span class="s2">&quot;output_file.jl&quot;</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;url&quot;</span><span class="p">,</span> <span class="s2">&quot;meta_desc&quot;</span><span class="p">],</span> <span class="n">regex</span><span class="o">=</span><span class="s2">&quot;jsonld&quot;</span>
+<span class="gp">... </span><span class="p">)</span>
 </pre></div>
 </div>
 <dl class="field-list simple">
@@ -1092,7 +1095,7 @@ <h2>Module functions<a class="headerlink" href="#module-functions" title="Link t
 <p class="rubric">Examples</p>
 <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">advertools</span> <span class="k">as</span> <span class="nn">adv</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
-<span class="gp">&gt;&gt;&gt; </span><span class="n">crawldf</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_json</span><span class="p">(</span><span class="s1">&#39;output_file.jl&#39;</span><span class="p">,</span> <span class="n">lines</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="n">crawldf</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_json</span><span class="p">(</span><span class="s2">&quot;output_file.jl&quot;</span><span class="p">,</span> <span class="n">lines</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">link_df</span> <span class="o">=</span> <span class="n">adv</span><span class="o">.</span><span class="n">crawlytics</span><span class="o">.</span><span class="n">links</span><span class="p">(</span><span class="n">crawldf</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">link_df</span>
 </pre></div>
@@ -1211,7 +1214,7 @@ <h2>Module functions<a class="headerlink" href="#module-functions" title="Link t
 <p class="rubric">Examples</p>
 <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">advertools</span> <span class="k">as</span> <span class="nn">adv</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
-<span class="gp">&gt;&gt;&gt; </span><span class="n">crawldf</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_json</span><span class="p">(</span><span class="s1">&#39;output_file.jl&#39;</span><span class="p">,</span> <span class="n">lines</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="n">crawldf</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_json</span><span class="p">(</span><span class="s2">&quot;output_file.jl&quot;</span><span class="p">,</span> <span class="n">lines</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">redirect_df</span> <span class="o">=</span> <span class="n">adv</span><span class="o">.</span><span class="n">crawlytics</span><span class="o">.</span><span class="n">redirects</span><span class="p">(</span><span class="n">crawldf</span><span class="p">)</span>
 <span class="gp">&gt;&gt;&gt; </span><span class="n">redirect_df</span>
 </pre></div>
@@ -1280,6 +1283,126 @@ <h2>Module functions<a class="headerlink" href="#module-functions" title="Link t
 </table>
 </dd></dl>
 
+<dl class="py function">
+<dt class="sig sig-object py" id="advertools.crawlytics.running_crawls">
+<span class="sig-name descname"><span class="pre">running_crawls</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/advertools/crawlytics.html#running_crawls"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#advertools.crawlytics.running_crawls" title="Link to this definition"></a></dt>
+<dd><p>Get details of currently running spiders.</p>
+<p>Get a DataFrame showing the following details:</p>
+<ul class="simple">
+<li><p>pid: Process ID. Use this to identify (or stop) the spider that you want.</p></li>
+<li><p>started: The time when this spider has started.</p></li>
+<li><p>elapsed: The elapsed time since the spider started.</p></li>
+<li><p>%mem: The percentage of memory that this spider is consuming.</p></li>
+<li><p>%cpu: The percentage of CPU that this spider is consuming.</p></li>
+<li><p>command: The command that was used to start this spider. Use this to identify
+the spider(s) that you want to know about.</p></li>
+<li><p>output_file: The path to the output file for each running crawl job.</p></li>
+<li><p>crawled_urls: The current number of lines in <code class="docutils literal notranslate"><span class="pre">output_file</span></code>.</p></li>
+</ul>
+<p class="rubric">Examples</p>
+<p>While a crawl is running:</p>
+<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">advertools</span> <span class="k">as</span> <span class="nn">adv</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="n">adv</span><span class="o">.</span><span class="n">crawlytics</span><span class="o">.</span><span class="n">running_crawls</span><span class="p">()</span>
+</pre></div>
+</div>
+<table class="docutils align-default">
+<thead>
+<tr class="row-odd"><th class="head"></th>
+<th class="head"><p>pid</p></th>
+<th class="head"><p>started</p></th>
+<th class="head"><p>elapsed</p></th>
+<th class="head"><p>%mem</p></th>
+<th class="head"><p>%cpu</p></th>
+<th class="head"><p>command</p></th>
+<th class="head"><p>output_file</p></th>
+<th class="head"><p>crawled_urls</p></th>
+</tr>
+</thead>
+<tbody>
+<tr class="row-even"><td><p>0</p></td>
+<td><p>195720</p></td>
+<td><p>21:41:14</p></td>
+<td><p>00:11</p></td>
+<td><p>1.1</p></td>
+<td><p>103</p></td>
+<td><p>/opt/tljh/user/bin/python /opt/tljh/user/bin/scrapy runspider /opt/tljh/user/lib/python3.10/site-packages/advertools/spider.py -a url_list=https://cnn.com -a allowed_domains=cnn.com -a follow_links=True -a exclude_url_params=None -a include_url_params=None -a exclude_url_regex=None -a include_url_regex=None -a css_selectors=None -a xpath_selectors=None -o cnn.jl -s CLOSESPIDER_PAGECOUNT=200</p></td>
+<td><p>cnn.jl</p></td>
+<td><p>30</p></td>
+</tr>
+</tbody>
+</table>
+<p>After a few moments:</p>
+<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">adv</span><span class="o">.</span><span class="n">crawlytics</span><span class="o">.</span><span class="n">running_crawls</span><span class="p">()</span>
+</pre></div>
+</div>
+<table class="docutils align-default">
+<thead>
+<tr class="row-odd"><th class="head"></th>
+<th class="head"><p>pid</p></th>
+<th class="head"><p>started</p></th>
+<th class="head"><p>elapsed</p></th>
+<th class="head"><p>%mem</p></th>
+<th class="head"><p>%cpu</p></th>
+<th class="head"><p>command</p></th>
+<th class="head"><p>output_file</p></th>
+<th class="head"><p>crawled_urls</p></th>
+</tr>
+</thead>
+<tbody>
+<tr class="row-even"><td><p>0</p></td>
+<td><p>195720</p></td>
+<td><p>21:41:14</p></td>
+<td><p>00:27</p></td>
+<td><p>1.2</p></td>
+<td><p>96.7</p></td>
+<td><p>/opt/tljh/user/bin/python /opt/tljh/user/bin/scrapy runspider /opt/tljh/user/lib/python3.10/site-packages/advertools/spider.py -a url_list=https://cnn.com -a allowed_domains=cnn.com -a follow_links=True -a exclude_url_params=None -a include_url_params=None -a exclude_url_regex=None -a include_url_regex=None -a css_selectors=None -a xpath_selectors=None -o cnn.jl -s CLOSESPIDER_PAGECOUNT=200</p></td>
+<td><p>cnn.jl</p></td>
+<td><p>72</p></td>
+</tr>
+</tbody>
+</table>
+<p>After starting a new crawl:</p>
+<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">adv</span><span class="o">.</span><span class="n">crawlytics</span><span class="o">.</span><span class="n">running_crawls</span><span class="p">()</span>
+</pre></div>
+</div>
+<table class="docutils align-default">
+<thead>
+<tr class="row-odd"><th class="head"></th>
+<th class="head"><p>pid</p></th>
+<th class="head"><p>started</p></th>
+<th class="head"><p>elapsed</p></th>
+<th class="head"><p>%mem</p></th>
+<th class="head"><p>%cpu</p></th>
+<th class="head"><p>command</p></th>
+<th class="head"><p>output_file</p></th>
+<th class="head"><p>crawled_urls</p></th>
+</tr>
+</thead>
+<tbody>
+<tr class="row-even"><td><p>0</p></td>
+<td><p>195720</p></td>
+<td><p>21:41:14</p></td>
+<td><p>01:02</p></td>
+<td><p>1.6</p></td>
+<td><p>95.7</p></td>
+<td><p>/opt/tljh/user/bin/python /opt/tljh/user/bin/scrapy runspider /opt/tljh/user/lib/python3.10/site-packages/advertools/spider.py -a url_list=https://cnn.com -a allowed_domains=cnn.com -a follow_links=True -a exclude_url_params=None -a include_url_params=None -a exclude_url_regex=None -a include_url_regex=None -a css_selectors=None -a xpath_selectors=None -o cnn.jl -s CLOSESPIDER_PAGECOUNT=200</p></td>
+<td><p>cnn.jl</p></td>
+<td><p>154</p></td>
+</tr>
+<tr class="row-odd"><td><p>1</p></td>
+<td><p>195769</p></td>
+<td><p>21:42:09</p></td>
+<td><p>00:07</p></td>
+<td><p>0.4</p></td>
+<td><p>83.8</p></td>
+<td><p>/opt/tljh/user/bin/python /opt/tljh/user/bin/scrapy runspider /opt/tljh/user/lib/python3.10/site-packages/advertools/spider.py -a url_list=https://nytimes.com -a allowed_domains=nytimes.com -a follow_links=True -a exclude_url_params=None -a include_url_params=None -a exclude_url_regex=None -a include_url_regex=None -a css_selectors=None -a xpath_selectors=None -o nyt.jl -s CLOSESPIDER_PAGECOUNT=200</p></td>
+<td><p>nyt.jl</p></td>
+<td><p>17</p></td>
+</tr>
+</tbody>
+</table>
+</dd></dl>
+
 </section>
 
     <script type="text/x-thebe-config">
diff --git a/docs/_build/html/genindex.html b/docs/_build/html/genindex.html
index c34d683f..a51f58ef 100644
--- a/docs/_build/html/genindex.html
+++ b/docs/_build/html/genindex.html
@@ -687,15 +687,17 @@ <h2 id="R">R</h2>
 </li>
       <li><a href="advertools.twitter.html#advertools.twitter.retweeted_of_me">retweeted_of_me() (in module advertools.twitter)</a>
 </li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="advertools.reverse_dns_lookup.html#advertools.reverse_dns_lookup.reverse_dns_lookup">reverse_dns_lookup() (in module advertools.reverse_dns_lookup)</a>
 </li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="advertools.word_frequency.html#term-rm_words"><strong>rm_words</strong></a>
 </li>
       <li><a href="advertools.robotstxt.html#advertools.robotstxt.robotstxt_test">robotstxt_test() (in module advertools.robotstxt)</a>
 </li>
       <li><a href="advertools.robotstxt.html#advertools.robotstxt.robotstxt_to_df">robotstxt_to_df() (in module advertools.robotstxt)</a>
+</li>
+      <li><a href="advertools.crawlytics.html#advertools.crawlytics.running_crawls">running_crawls() (in module advertools.crawlytics)</a>
 </li>
   </ul></td>
 </tr></table>
diff --git a/docs/_build/html/objects.inv b/docs/_build/html/objects.inv
index 24d48feb..15fea26d 100644
Binary files a/docs/_build/html/objects.inv and b/docs/_build/html/objects.inv differ
diff --git a/docs/_build/html/searchindex.js b/docs/_build/html/searchindex.js
index 7da19f7c..6e53d90a 100644
--- a/docs/_build/html/searchindex.js
+++ b/docs/_build/html/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"alltitles": {"0.1.0 (2018-07-02)": [[29, "id42"]], "0.10.0 (2020-05-21)": [[29, "id24"]], "0.10.1 (2020-05-23)": [[29, "id23"]], "0.10.2 (2020-05-25)": [[29, "id22"]], "0.10.3 (2020-06-03)": [[29, "id21"]], "0.10.4 (2020-06-07)": [[29, "id20"]], "0.10.5 (2020-06-14)": [[29, "id19"]], "0.10.6 (2020-06-30)": [[29, "id18"]], "0.10.7 (2020-09-18)": [[29, "id17"]], "0.11.0 (2021-03-31)": [[29, "id16"]], "0.11.1 (2021-04-09)": [[29, "id15"]], "0.12.0,1,2 (2021-11-27)": [[29, "id14"]], "0.12.3 (2021-11-27)": [[29, "id13"]], "0.13.0 (2022-02-10)": [[29, "id12"]], "0.13.1 (2022-05-11)": [[29, "id11"]], "0.13.2 (2022-09-30)": [[29, "id10"]], "0.13.3 (2023-06-27)": [[29, "id9"]], "0.13.4 (2023-07-26)": [[29, "id8"]], "0.13.5 (2023-08-22)": [[29, "id7"]], "0.14.0 (2024-02-18)": [[29, "id6"]], "0.14.1 (2024-02-21)": [[29, "id5"]], "0.14.2 (2024-02-24)": [[29, "id4"]], "0.14.3 (2024-06-27)": [[29, "id3"]], "0.14.4 (2024-07-13)": [[29, "id2"]], "0.15.0 (2024-07-15)": [[29, "id1"]], "0.2.0 (2018-07-06)": [[29, "id41"]], "0.3.0 (2018-08-14)": [[29, "id40"]], "0.4.0 (2018-10-08)": [[29, "id39"]], "0.4.1 (2018-10-13)": [[29, "id38"]], "0.5.0 (2018-11-04)": [[29, "id37"]], "0.5.1 (2018-11-06)": [[29, "id36"]], "0.5.2 (2018-12-01)": [[29, "id35"]], "0.5.3 (2019-01-31)": [[29, "id34"]], "0.6.0 (2019-02-11)": [[29, "id33"]], "0.7.0 (2019-03-26)": [[29, "id32"]], "0.7.1 (2019-03-26)": [[29, "id31"]], "0.7.2 (2019-03-29)": [[29, "id30"]], "0.7.3 (2019-04-17)": [[29, "id29"]], "0.8.0 (2020-02-02)": [[29, "id28"]], "0.8.1 (2020-02-08)": [[29, "id27"]], "0.9.0 (2020-04-03)": [[29, "id26"]], "0.9.1 (2020-05-19)": [[29, "id25"]], "Absolute and Weighted Word Count": [[26, "absolute-and-weighted-word-count"]], "Absolute vs Weighted Frequency": [[26, "absolute-vs-weighted-frequency"]], "Account Setup": [[12, "account-setup"]], "Analyzing a large number of URLs": [[25, "analyzing-a-large-number-of-urls"]], "Analyzing crawled images": [[7, "analyzing-crawled-images"]], "Analyzing links in a crawled website": [[7, "analyzing-links-in-a-crawled-website"]], "Analyzing the redirects of a crawled website": [[7, "analyzing-the-redirects-of-a-crawled-website"]], "Authentication": [[23, "authentication"]], "Bulk robots.txt Tester": [[17, "bulk-robots-txt-tester"]], "Change Log - advertools": [[29, "change-log-advertools"]], "Compressing large crawl files": [[7, "compressing-large-crawl-files"]], "Conventions": [[32, "conventions"]], "Crawling and Scraping Analysis": [[7, null]], "Create Ads Using Long Descriptive Text (top-down approach)": [[2, null]], "Create Ads on a Large Scale": [[1, null]], "Custom Extraction with CSS and XPath Selectors": [[20, "custom-extraction-with-css-and-xpath-selectors"]], "Customizing the Crawling Behavior while Following Links": [[20, "customizing-the-crawling-behavior-while-following-links"]], "Discovery Crawling Approach": [[20, "discovery-crawling-approach"]], "Download, Parse, and Analyze XML Sitemaps": [[19, null]], "Emoji Search": [[8, "emoji-search"]], "Emoji: Extract, Analyze, and Get Insights": [[8, null]], "Exploring the columns and data types of parquet files": [[7, "exploring-the-columns-and-data-types-of-parquet-files"]], "Extract #hashtags": [[9, "extract-hashtags"]], "Extract @mentions": [[9, "extract-mentions"]], "Extract Currency  $ \u00a2 \u00a3 \u00a4 \u00a5 \u058f \u060b \u20b2 \u20b5 \u20b8 \u20b9\ufe69 \uffe0 \uffe1 \uffe5 \uffe6 \u20ba \u20bb \u20bc \u20bd \u20be \u20bf \ufdfc": [[9, "extract-currency"]], "Extract Emoji from Text": [[8, "extract-emoji-from-text"]], "Extract Emoji \ud83d\ude02\ud83d\ude2d\ud83e\udd7a\ud83e\udd23\u2764\ufe0f\u2728\ud83d\ude4f\ud83d\ude0d": [[9, "extract-emoji"]], "Extract Exclamations ! \u00a1 \u055c \u07f9 \u1944 \u203c \u2048 \u2049 \ufe15 \ufe57 \uff01 \ud83a\udd5e": [[9, "extract-exclamations"]], "Extract Functions": [[9, "extract-functions"]], "Extract numbers 1234567890\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669\u32ba\ud804\udc5b\ud800\udd0d\ud802\udcaa\u24f2\ud804\udc63\ud800\udd28\ud802\udd1b": [[9, "extract-numbers-123456789045"]], "Extract questions ? \u00bf \u037e \u055e \u061f \u1367 \u1945 \u2047 \u2048 \u2049 \u2cfa \u2cfb \u2e2e \ua60f \ua6f7 \ufe16 \ufe56 \uff1f \ud804\udd43 \ud83a\udd5f \u0294 \u203d": [[9, "extract-questions"]], "Extract structured entities from text lists": [[9, null]], "Extracted On-Page SEO Elements": [[20, "extracted-on-page-seo-elements"]], "Facebook Feed Ads": [[2, "facebook-feed-ads"]], "Facebook Instant Article Ad": [[2, "facebook-instant-article-ad"]], "Functions": [[23, "functions"]], "Generate Keywords for SEM Campaigns": [[13, null]], "Google Analytics / Google Search Console": [[20, "google-analytics-google-search-console"]], "Google Text Ads": [[2, "google-text-ads"]], "Handling very large crawl files": [[7, "handling-very-large-crawl-files"]], "How can I (dis)obey robots.txt rules?": [[6, "how-can-i-dis-obey-robots-txt-rules"]], "How can I automatically stop my crawl based on a certain condition?": [[6, "how-can-i-automatically-stop-my-crawl-based-on-a-certain-condition"]], "How can I change the default request headers?": [[6, "how-can-i-change-the-default-request-headers"]], "How can I control the number of concurrent requests while crawling?": [[6, "how-can-i-control-the-number-of-concurrent-requests-while-crawling"]], "How can I crawl a website including its sub-domains?": [[6, "how-can-i-crawl-a-website-including-its-sub-domains"]], "How can I save a copy of the logs of my crawl for auditing them later?": [[6, "how-can-i-save-a-copy-of-the-logs-of-my-crawl-for-auditing-them-later"]], "How can I set multiple settings to the same crawl job?": [[6, "how-can-i-set-multiple-settings-to-the-same-crawl-job"]], "How can I slow down the crawling so I don't hit the websites' servers too hard?": [[6, "how-can-i-slow-down-the-crawling-so-i-don-t-hit-the-websites-servers-too-hard"]], "How do I pause/resume crawling, while making sure I don't crawl the same page twice?": [[6, "how-do-i-pause-resume-crawling-while-making-sure-i-don-t-crawl-the-same-page-twice"]], "How do I set my User-agent while crawling?": [[6, "how-do-i-set-my-user-agent-while-crawling"]], "How do I use a proxy while crawling?": [[6, "how-do-i-use-a-proxy-while-crawling"]], "How to crawl a list of pages, and those pages only (list mode)?": [[6, "how-to-crawl-a-list-of-pages-and-those-pages-only-list-mode"]], "How to run the logs_to_df() function:": [[14, "how-to-run-the-logs-to-df-function"]], "How to use Google's Knowledge Graph API": [[12, "how-to-use-google-s-knowledge-graph-api"]], "I want to crawl a list of pages, follow links from those pages, but only to a certain specified depth": [[6, "i-want-to-crawl-a-list-of-pages-follow-links-from-those-pages-but-only-to-a-certain-specified-depth"]], "Image Crawler and Downloader": [[11, null]], "Image file names": [[11, "image-file-names"]], "Import Search Engine Results Pages (SERPs) for Google and YouTube": [[18, null]], "Import and Analyze Knowledge Graph Results on a Large Scale": [[12, null]], "Index & Change Log": [[30, null]], "Indices and tables": [[30, "indices-and-tables"]], "Installation": [[32, "installation"]], "Log File Analysis": [[14, null]], "Log File Analysis - Data Preparation": [[14, "log-file-analysis-data-preparation"]], "Module contents": [[0, "module-advertools"], [5, "module-advertools.code_recipes"]], "Module functions": [[7, "module-functions"]], "News Articles": [[20, "news-articles"]], "News Sitemaps": [[19, "news-sitemaps"]], "Online marketing productivity and analysis tools": [[30, "online-marketing-productivity-and-analysis-tools"]], "Parse and Analyze Crawl Logs in a Dataframe": [[14, "parse-and-analyze-crawl-logs-in-a-dataframe"]], "Philosophy/approach": [[32, "philosophy-approach"]], "Pre-Determined Crawling Approach (List Mode)": [[20, "pre-determined-crawling-approach-list-mode"]], "Query Parameters": [[25, "query-parameters"]], "Regular Expressions for Extracting Structured Entities": [[15, null]], "Regular XML Sitemaps": [[19, "regular-xml-sitemaps"]], "Request Headers": [[19, "request-headers"]], "Reverse DNS Lookup in Bulk": [[16, null]], "SEM": [[30, null]], "SEM Campaigns": [[32, "sem-campaigns"]], "SEO": [[30, null], [32, "seo"]], "SEO crawler": [[4, "seo-crawler"]], "SERP Data": [[20, "serp-data"]], "Sitemap Index": [[19, "sitemap-index"]], "Social Media": [[30, null], [32, "social-media"]], "Spider Custom Settings and Additional Functionality": [[20, "spider-custom-settings-and-additional-functionality"]], "Split, Parse, and Analyze URL Structure": [[25, null]], "Stopword Languages": [[21, "stopword-languages"]], "Stopwords in Several Languages": [[21, null]], "Submodules": [[0, "submodules"], [5, "submodules"]], "Subpackages": [[0, "subpackages"]], "Supported Log Formats": [[14, "supported-log-formats"]], "Survey - share feedback": [[22, null]], "Text & Content Analysis": [[30, null]], "Text & Content Analysis (for SEO & Social Media)": [[32, "text-content-analysis-for-seo-social-media"]], "Text Analysis": [[26, null]], "The URL Path (Directories):": [[25, "the-url-path-directories"]], "Tokenize Words (N-grams)": [[27, null]], "Twitter Data API": [[23, null]], "URL Builders": [[24, null]], "URL Query Parameters": [[20, "url-query-parameters"]], "URL Regex Patterns": [[20, "url-regex-patterns"]], "User-agent strings for use in crawling": [[6, "user-agent-strings-for-use-in-crawling"]], "User-agents": [[17, "user-agents"]], "Video Sitemaps": [[19, "video-sitemaps"]], "XPath expressions for custom extraction": [[6, "xpath-expressions-for-custom-extraction"]], "YouTube Data API": [[28, null]], "advertools": [[29, null], [30, null], [31, null]], "advertools Command Line Interface (CLI)": [[4, null]], "advertools package": [[0, null]], "advertools.cli module": [[3, null]], "advertools.code_recipes package": [[5, null]], "advertools: productivity & analysis tools to scale your online marketing": [[32, null]], "convert a robots.txt file (or list of file URLs) to a table in a CSV format": [[4, "convert-a-robots-txt-file-or-list-of-file-urls-to-a-table-in-a-csv-format"]], "crawl a list of known URLs using the HEAD method": [[4, "crawl-a-list-of-known-urls-using-the-head-method"]], "download, parse, and save an XML sitemap to a table in a CSV file": [[4, "download-parse-and-save-an-xml-sitemap-to-a-table-in-a-csv-file"]], "extract structured entities from a text list; emoji, hashtags, mentions": [[4, "extract-structured-entities-from-a-text-list-emoji-hashtags-mentions"]], "generate a table of SEM keywords by supplying a list of products and a list of intent words": [[4, "generate-a-table-of-sem-keywords-by-supplying-a-list-of-products-and-a-list-of-intent-words"]], "get stopwords of the selected language": [[4, "get-stopwords-of-the-selected-language"]], "get word counts of a text list optionally weighted by a number list": [[4, "get-word-counts-of-a-text-list-optionally-weighted-by-a-number-list"]], "parse, compress and convert a log file to a DataFrame in the .parquet format": [[4, "parse-compress-and-convert-a-log-file-to-a-dataframe-in-the-parquet-format"]], "perform a reverse DNS lookup on a list of IP addresses": [[4, "perform-a-reverse-dns-lookup-on-a-list-of-ip-addresses"]], "robots.txt Testing Approach": [[17, "robots-txt-testing-approach"]], "search for emoji using a regex": [[4, "search-for-emoji-using-a-regex"]], "split a list of URLs into their components: scheme, netloc, path, query, etc.": [[4, "split-a-list-of-urls-into-their-components-scheme-netloc-path-query-etc"]], "tokenize documents (phrases, keywords, tweets, etc) into token of the desired length": [[4, "tokenize-documents-phrases-keywords-tweets-etc-into-token-of-the-desired-length"]], "\ud83d\udd77 Python SEO Crawler / Spider": [[20, null]], "\ud83d\udd77 Python Status Code Checker with Response Headers": [[10, null]], "\ud83d\udd77 SEO Crawling & Scraping: Strategies & Recipes": [[6, null]], "\ud83e\udd16 Analyze and Test robots.txt Files on a Large Scale": [[17, null]]}, "docnames": ["advertools", "advertools.ad_create", "advertools.ad_from_string", "advertools.cli", "advertools.cli.cli", "advertools.code_recipes", "advertools.code_recipes.spider_strategies", "advertools.crawlytics", "advertools.emoji", "advertools.extract", "advertools.header_spider", "advertools.image_spider", "advertools.knowledge_graph", "advertools.kw_generate", "advertools.logs", "advertools.regex", "advertools.reverse_dns_lookup", "advertools.robotstxt", "advertools.serp", "advertools.sitemaps", "advertools.spider", "advertools.stopwords", "advertools.survey", "advertools.twitter", "advertools.url_builders", "advertools.urlytics", "advertools.word_frequency", "advertools.word_tokenize", "advertools.youtube", "include_changelog", "index", "modules", "readme"], "envversion": {"sphinx": 62, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1}, "filenames": ["advertools.rst", "advertools.ad_create.rst", "advertools.ad_from_string.rst", "advertools.cli.rst", "advertools.cli.cli.rst", "advertools.code_recipes.rst", "advertools.code_recipes.spider_strategies.rst", "advertools.crawlytics.rst", "advertools.emoji.rst", "advertools.extract.rst", "advertools.header_spider.rst", "advertools.image_spider.rst", "advertools.knowledge_graph.rst", "advertools.kw_generate.rst", "advertools.logs.rst", "advertools.regex.rst", "advertools.reverse_dns_lookup.rst", "advertools.robotstxt.rst", "advertools.serp.rst", "advertools.sitemaps.rst", "advertools.spider.rst", "advertools.stopwords.rst", "advertools.survey.rst", "advertools.twitter.rst", "advertools.url_builders.rst", "advertools.urlytics.rst", "advertools.word_frequency.rst", "advertools.word_tokenize.rst", "advertools.youtube.rst", "include_changelog.rst", "index.rst", "modules.rst", "readme.rst"], "indexentries": {"activities_list() (in module advertools.youtube)": [[28, "advertools.youtube.activities_list", false]], "ad_create() (in module advertools.ad_create)": [[1, "advertools.ad_create.ad_create", false]], "ad_from_string() (in module advertools.ad_from_string)": [[2, "advertools.ad_from_string.ad_from_string", false]], "advertools": [[0, "module-advertools", false]], "advertools.ad_create": [[1, "module-advertools.ad_create", false]], "advertools.ad_from_string": [[2, "module-advertools.ad_from_string", false]], "advertools.cli": [[3, "module-advertools.cli", false]], "advertools.cli.cli": [[4, "module-advertools.cli.cli", false]], "advertools.code_recipes": [[5, "module-advertools.code_recipes", false]], "advertools.code_recipes.spider_strategies": [[6, "module-advertools.code_recipes.spider_strategies", false]], "advertools.crawlytics": [[7, "module-advertools.crawlytics", false]], "advertools.emoji": [[8, "module-advertools.emoji", false]], "advertools.extract": [[9, "module-advertools.extract", false]], "advertools.header_spider": [[10, "module-advertools.header_spider", false]], "advertools.image_spider": [[11, "module-advertools.image_spider", false]], "advertools.knowledge_graph": [[12, "module-advertools.knowledge_graph", false]], "advertools.kw_generate": [[13, "module-advertools.kw_generate", false]], "advertools.logs": [[14, "module-advertools.logs", false]], "advertools.regex": [[15, "module-advertools.regex", false]], "advertools.reverse_dns_lookup": [[16, "module-advertools.reverse_dns_lookup", false]], "advertools.robotstxt": [[17, "module-advertools.robotstxt", false]], "advertools.serp": [[18, "module-advertools.serp", false]], "advertools.sitemaps": [[19, "module-advertools.sitemaps", false]], "advertools.spider": [[20, "module-advertools.spider", false]], "advertools.stopwords": [[21, "module-advertools.stopwords", false]], "advertools.twitter": [[23, "module-advertools.twitter", false]], "advertools.url_builders": [[24, "module-advertools.url_builders", false]], "advertools.urlytics": [[25, "module-advertools.urlytics", false]], "advertools.word_frequency": [[26, "module-advertools.word_frequency", false]], "advertools.word_tokenize": [[27, "module-advertools.word_tokenize", false]], "advertools.youtube": [[28, "module-advertools.youtube", false]], "advimagespipeline (class in advertools.image_spider)": [[11, "advertools.image_spider.AdvImagesPipeline", false]], "authenticate() (in module advertools.twitter)": [[23, "advertools.twitter.authenticate", false]], "capitalize": [[2, "term-capitalize", true]], "captions_list() (in module advertools.youtube)": [[28, "advertools.youtube.captions_list", false]], "channel_sections_list() (in module advertools.youtube)": [[28, "advertools.youtube.channel_sections_list", false]], "channels_list() (in module advertools.youtube)": [[28, "advertools.youtube.channels_list", false]], "comment_threads_list() (in module advertools.youtube)": [[28, "advertools.youtube.comment_threads_list", false]], "comments_list() (in module advertools.youtube)": [[28, "advertools.youtube.comments_list", false]], "compare() (in module advertools.crawlytics)": [[7, "advertools.crawlytics.compare", false]], "crawl() (in module advertools.spider)": [[20, "advertools.spider.crawl", false]], "crawl_headers() (in module advertools.header_spider)": [[10, "advertools.header_spider.crawl_headers", false]], "crawl_images() (in module advertools.image_spider)": [[11, "advertools.image_spider.crawl_images", false]], "crawllogs_to_df() (in module advertools.logs)": [[14, "advertools.logs.crawllogs_to_df", false]], "custom_settings (headersspider attribute)": [[10, "advertools.header_spider.HeadersSpider.custom_settings", false]], "custom_settings (imagespider attribute)": [[11, "advertools.image_spider.ImageSpider.custom_settings", false]], "emoji_search() (in module advertools.emoji)": [[8, "advertools.emoji.emoji_search", false]], "errback() (headersspider method)": [[10, "advertools.header_spider.HeadersSpider.errback", false]], "extra_info": [[26, "term-extra_info", true]], "extract() (in module advertools.extract)": [[9, "advertools.extract.extract", false]], "extract_currency() (in module advertools.extract)": [[9, "advertools.extract.extract_currency", false]], "extract_emoji() (in module advertools.emoji)": [[8, "advertools.emoji.extract_emoji", false]], "extract_exclamations() (in module advertools.extract)": [[9, "advertools.extract.extract_exclamations", false]], "extract_hashtags() (in module advertools.extract)": [[9, "advertools.extract.extract_hashtags", false]], "extract_intense_words() (in module advertools.extract)": [[9, "advertools.extract.extract_intense_words", false]], "extract_mentions() (in module advertools.extract)": [[9, "advertools.extract.extract_mentions", false]], "extract_numbers() (in module advertools.extract)": [[9, "advertools.extract.extract_numbers", false]], "extract_questions() (in module advertools.extract)": [[9, "advertools.extract.extract_questions", false]], "extract_urls() (in module advertools.extract)": [[9, "advertools.extract.extract_urls", false]], "extract_words() (in module advertools.extract)": [[9, "advertools.extract.extract_words", false]], "fields (imgitem attribute)": [[11, "advertools.image_spider.ImgItem.fields", false]], "file_path() (advimagespipeline method)": [[11, "advertools.image_spider.AdvImagesPipeline.file_path", false]], "get_application_rate_limit_status() (in module advertools.twitter)": [[23, "advertools.twitter.get_application_rate_limit_status", false]], "get_available_trends() (in module advertools.twitter)": [[23, "advertools.twitter.get_available_trends", false]], "get_favorites() (in module advertools.twitter)": [[23, "advertools.twitter.get_favorites", false]], "get_followers_ids() (in module advertools.twitter)": [[23, "advertools.twitter.get_followers_ids", false]], "get_followers_list() (in module advertools.twitter)": [[23, "advertools.twitter.get_followers_list", false]], "get_friends_ids() (in module advertools.twitter)": [[23, "advertools.twitter.get_friends_ids", false]], "get_friends_list() (in module advertools.twitter)": [[23, "advertools.twitter.get_friends_list", false]], "get_home_timeline() (in module advertools.twitter)": [[23, "advertools.twitter.get_home_timeline", false]], "get_list_members() (in module advertools.twitter)": [[23, "advertools.twitter.get_list_members", false]], "get_list_memberships() (in module advertools.twitter)": [[23, "advertools.twitter.get_list_memberships", false]], "get_list_statuses() (in module advertools.twitter)": [[23, "advertools.twitter.get_list_statuses", false]], "get_list_subscribers() (in module advertools.twitter)": [[23, "advertools.twitter.get_list_subscribers", false]], "get_list_subscriptions() (in module advertools.twitter)": [[23, "advertools.twitter.get_list_subscriptions", false]], "get_mentions_timeline() (in module advertools.twitter)": [[23, "advertools.twitter.get_mentions_timeline", false]], "get_place_trends() (in module advertools.twitter)": [[23, "advertools.twitter.get_place_trends", false]], "get_retweeters_ids() (in module advertools.twitter)": [[23, "advertools.twitter.get_retweeters_ids", false]], "get_retweets() (in module advertools.twitter)": [[23, "advertools.twitter.get_retweets", false]], "get_supported_languages() (in module advertools.twitter)": [[23, "advertools.twitter.get_supported_languages", false]], "get_user_timeline() (in module advertools.twitter)": [[23, "advertools.twitter.get_user_timeline", false]], "guide_categories_list() (in module advertools.youtube)": [[28, "advertools.youtube.guide_categories_list", false]], "headersspider (class in advertools.header_spider)": [[10, "advertools.header_spider.HeadersSpider", false]], "i18n_languages_list() (in module advertools.youtube)": [[28, "advertools.youtube.i18n_languages_list", false]], "i18n_regions_list() (in module advertools.youtube)": [[28, "advertools.youtube.i18n_regions_list", false]], "images() (in module advertools.crawlytics)": [[7, "advertools.crawlytics.images", false]], "imagespider (class in advertools.image_spider)": [[11, "advertools.image_spider.ImageSpider", false]], "imgitem (class in advertools.image_spider)": [[11, "advertools.image_spider.ImgItem", false]], "include_img_regex (imagespider attribute)": [[11, "advertools.image_spider.ImageSpider.include_img_regex", false]], "jl_subset() (in module advertools.crawlytics)": [[7, "advertools.crawlytics.jl_subset", false]], "jl_to_parquet() (in module advertools.crawlytics)": [[7, "advertools.crawlytics.jl_to_parquet", false]], "knowledge_graph() (in module advertools.knowledge_graph)": [[12, "advertools.knowledge_graph.knowledge_graph", false]], "kw_broad() (in module advertools.kw_generate)": [[13, "advertools.kw_generate.kw_broad", false]], "kw_exact() (in module advertools.kw_generate)": [[13, "advertools.kw_generate.kw_exact", false]], "kw_generate() (in module advertools.kw_generate)": [[13, "advertools.kw_generate.kw_generate", false]], "kw_modified() (in module advertools.kw_generate)": [[13, "advertools.kw_generate.kw_modified", false]], "kw_neg_broad() (in module advertools.kw_generate)": [[13, "advertools.kw_generate.kw_neg_broad", false]], "kw_neg_exact() (in module advertools.kw_generate)": [[13, "advertools.kw_generate.kw_neg_exact", false]], "kw_neg_phrase() (in module advertools.kw_generate)": [[13, "advertools.kw_generate.kw_neg_phrase", false]], "kw_phrase() (in module advertools.kw_generate)": [[13, "advertools.kw_generate.kw_phrase", false]], "links() (in module advertools.crawlytics)": [[7, "advertools.crawlytics.links", false]], "logs_to_df() (in module advertools.logs)": [[14, "advertools.logs.logs_to_df", false]], "lookup_status() (in module advertools.twitter)": [[23, "advertools.twitter.lookup_status", false]], "lookup_user() (in module advertools.twitter)": [[23, "advertools.twitter.lookup_user", false]], "make_dataframe() (in module advertools.twitter)": [[23, "advertools.twitter.make_dataframe", false]], "module": [[0, "module-advertools", false], [1, "module-advertools.ad_create", false], [2, "module-advertools.ad_from_string", false], [3, "module-advertools.cli", false], [4, "module-advertools.cli.cli", false], [5, "module-advertools.code_recipes", false], [6, "module-advertools.code_recipes.spider_strategies", false], [7, "module-advertools.crawlytics", false], [8, "module-advertools.emoji", false], [9, "module-advertools.extract", false], [10, "module-advertools.header_spider", false], [11, "module-advertools.image_spider", false], [12, "module-advertools.knowledge_graph", false], [13, "module-advertools.kw_generate", false], [14, "module-advertools.logs", false], [15, "module-advertools.regex", false], [16, "module-advertools.reverse_dns_lookup", false], [17, "module-advertools.robotstxt", false], [18, "module-advertools.serp", false], [19, "module-advertools.sitemaps", false], [20, "module-advertools.spider", false], [21, "module-advertools.stopwords", false], [23, "module-advertools.twitter", false], [24, "module-advertools.url_builders", false], [25, "module-advertools.urlytics", false], [26, "module-advertools.word_frequency", false], [27, "module-advertools.word_tokenize", false], [28, "module-advertools.youtube", false]], "name (headersspider attribute)": [[10, "advertools.header_spider.HeadersSpider.name", false]], "name (imagespider attribute)": [[11, "advertools.image_spider.ImageSpider.name", false]], "num_list": [[26, "term-num_list", true]], "parquet_columns() (in module advertools.crawlytics)": [[7, "advertools.crawlytics.parquet_columns", false]], "parse() (headersspider method)": [[10, "advertools.header_spider.HeadersSpider.parse", false]], "parse() (imagespider method)": [[11, "advertools.image_spider.ImageSpider.parse", false]], "phrase_len": [[26, "term-phrase_len", true]], "playlist_items_list() (in module advertools.youtube)": [[28, "advertools.youtube.playlist_items_list", false]], "playlists_list() (in module advertools.youtube)": [[28, "advertools.youtube.playlists_list", false]], "redirects() (in module advertools.crawlytics)": [[7, "advertools.crawlytics.redirects", false]], "regex": [[26, "term-regex", true]], "retweeted_of_me() (in module advertools.twitter)": [[23, "advertools.twitter.retweeted_of_me", false]], "reverse_dns_lookup() (in module advertools.reverse_dns_lookup)": [[16, "advertools.reverse_dns_lookup.reverse_dns_lookup", false]], "rm_words": [[26, "term-rm_words", true]], "robotstxt_test() (in module advertools.robotstxt)": [[17, "advertools.robotstxt.robotstxt_test", false]], "robotstxt_to_df() (in module advertools.robotstxt)": [[17, "advertools.robotstxt.robotstxt_to_df", false]], "s": [[2, "term-s", true]], "search() (in module advertools.twitter)": [[23, "advertools.twitter.search", false]], "search() (in module advertools.youtube)": [[28, "advertools.youtube.search", false]], "search_users() (in module advertools.twitter)": [[23, "advertools.twitter.search_users", false]], "sep": [[2, "term-sep", true]], "serp_goog() (in module advertools.serp)": [[18, "advertools.serp.serp_goog", false]], "serp_youtube() (in module advertools.serp)": [[18, "advertools.serp.serp_youtube", false]], "set_auth_params() (in module advertools.twitter)": [[23, "advertools.twitter.set_auth_params", false]], "set_logging_level() (in module advertools.serp)": [[18, "advertools.serp.set_logging_level", false]], "show_lists() (in module advertools.twitter)": [[23, "advertools.twitter.show_lists", false]], "show_owned_lists() (in module advertools.twitter)": [[23, "advertools.twitter.show_owned_lists", false]], "sitemap_to_df() (in module advertools.sitemaps)": [[19, "advertools.sitemaps.sitemap_to_df", false]], "slots": [[2, "term-slots", true]], "start_requests() (headersspider method)": [[10, "advertools.header_spider.HeadersSpider.start_requests", false]], "start_requests() (imagespider method)": [[11, "advertools.image_spider.ImageSpider.start_requests", false]], "subscriptions_list() (in module advertools.youtube)": [[28, "advertools.youtube.subscriptions_list", false]], "summarize_crawled_imgs() (in module advertools.image_spider)": [[11, "advertools.image_spider.summarize_crawled_imgs", false]], "text_list": [[26, "term-text_list", true]], "url_to_df() (in module advertools.urlytics)": [[25, "advertools.urlytics.url_to_df", false]], "url_utm_ga() (in module advertools.url_builders)": [[24, "advertools.url_builders.url_utm_ga", false]], "video_categories_list() (in module advertools.youtube)": [[28, "advertools.youtube.video_categories_list", false]], "videos_list() (in module advertools.youtube)": [[28, "advertools.youtube.videos_list", false]], "word_frequency() (in module advertools.word_frequency)": [[26, "advertools.word_frequency.word_frequency", false]], "word_tokenize() (in module advertools.word_tokenize)": [[27, "advertools.word_tokenize.word_tokenize", false]], "youtube_channel_details() (in module advertools.serp)": [[18, "advertools.serp.youtube_channel_details", false]], "youtube_video_details() (in module advertools.serp)": [[18, "advertools.serp.youtube_video_details", false]]}, "objects": {"": [[0, 0, 0, "-", "advertools"]], "advertools": [[1, 0, 0, "-", "ad_create"], [2, 0, 0, "-", "ad_from_string"], [3, 0, 0, "-", "cli"], [5, 0, 0, "-", "code_recipes"], [7, 0, 0, "-", "crawlytics"], [8, 0, 0, "-", "emoji"], [9, 0, 0, "-", "extract"], [10, 0, 0, "-", "header_spider"], [11, 0, 0, "-", "image_spider"], [12, 0, 0, "-", "knowledge_graph"], [13, 0, 0, "-", "kw_generate"], [14, 0, 0, "-", "logs"], [15, 0, 0, "-", "regex"], [16, 0, 0, "-", "reverse_dns_lookup"], [17, 0, 0, "-", "robotstxt"], [18, 0, 0, "-", "serp"], [19, 0, 0, "-", "sitemaps"], [20, 0, 0, "-", "spider"], [21, 0, 0, "-", "stopwords"], [23, 0, 0, "-", "twitter"], [24, 0, 0, "-", "url_builders"], [25, 0, 0, "-", "urlytics"], [26, 0, 0, "-", "word_frequency"], [27, 0, 0, "-", "word_tokenize"], [28, 0, 0, "-", "youtube"]], "advertools.ad_create": [[1, 1, 1, "", "ad_create"]], "advertools.ad_from_string": [[2, 1, 1, "", "ad_from_string"]], "advertools.cli": [[4, 0, 0, "-", "cli"]], "advertools.code_recipes": [[6, 0, 0, "-", "spider_strategies"]], "advertools.crawlytics": [[7, 1, 1, "", "compare"], [7, 1, 1, "", "images"], [7, 1, 1, "", "jl_subset"], [7, 1, 1, "", "jl_to_parquet"], [7, 1, 1, "", "links"], [7, 1, 1, "", "parquet_columns"], [7, 1, 1, "", "redirects"]], "advertools.emoji": [[8, 1, 1, "", "emoji_search"], [8, 1, 1, "", "extract_emoji"]], "advertools.extract": [[9, 1, 1, "", "extract"], [9, 1, 1, "", "extract_currency"], [9, 1, 1, "", "extract_exclamations"], [9, 1, 1, "", "extract_hashtags"], [9, 1, 1, "", "extract_intense_words"], [9, 1, 1, "", "extract_mentions"], [9, 1, 1, "", "extract_numbers"], [9, 1, 1, "", "extract_questions"], [9, 1, 1, "", "extract_urls"], [9, 1, 1, "", "extract_words"]], "advertools.header_spider": [[10, 2, 1, "", "HeadersSpider"], [10, 1, 1, "", "crawl_headers"]], "advertools.header_spider.HeadersSpider": [[10, 3, 1, "", "custom_settings"], [10, 4, 1, "", "errback"], [10, 3, 1, "", "name"], [10, 4, 1, "", "parse"], [10, 4, 1, "", "start_requests"]], "advertools.image_spider": [[11, 2, 1, "", "AdvImagesPipeline"], [11, 2, 1, "", "ImageSpider"], [11, 2, 1, "", "ImgItem"], [11, 1, 1, "", "crawl_images"], [11, 1, 1, "", "summarize_crawled_imgs"]], "advertools.image_spider.AdvImagesPipeline": [[11, 4, 1, "", "file_path"]], "advertools.image_spider.ImageSpider": [[11, 3, 1, "", "custom_settings"], [11, 3, 1, "", "include_img_regex"], [11, 3, 1, "", "name"], [11, 4, 1, "", "parse"], [11, 4, 1, "", "start_requests"]], "advertools.image_spider.ImgItem": [[11, 3, 1, "", "fields"]], "advertools.knowledge_graph": [[12, 1, 1, "", "knowledge_graph"]], "advertools.kw_generate": [[13, 1, 1, "", "kw_broad"], [13, 1, 1, "", "kw_exact"], [13, 1, 1, "", "kw_generate"], [13, 1, 1, "", "kw_modified"], [13, 1, 1, "", "kw_neg_broad"], [13, 1, 1, "", "kw_neg_exact"], [13, 1, 1, "", "kw_neg_phrase"], [13, 1, 1, "", "kw_phrase"]], "advertools.logs": [[14, 1, 1, "", "crawllogs_to_df"], [14, 1, 1, "", "logs_to_df"]], "advertools.reverse_dns_lookup": [[16, 1, 1, "", "reverse_dns_lookup"]], "advertools.robotstxt": [[17, 1, 1, "", "robotstxt_test"], [17, 1, 1, "", "robotstxt_to_df"]], "advertools.serp": [[18, 1, 1, "", "serp_goog"], [18, 1, 1, "", "serp_youtube"], [18, 1, 1, "", "set_logging_level"], [18, 1, 1, "", "youtube_channel_details"], [18, 1, 1, "", "youtube_video_details"]], "advertools.sitemaps": [[19, 1, 1, "", "sitemap_to_df"]], "advertools.spider": [[20, 1, 1, "", "crawl"]], "advertools.twitter": [[23, 1, 1, "", "authenticate"], [23, 1, 1, "", "get_application_rate_limit_status"], [23, 1, 1, "", "get_available_trends"], [23, 1, 1, "", "get_favorites"], [23, 1, 1, "", "get_followers_ids"], [23, 1, 1, "", "get_followers_list"], [23, 1, 1, "", "get_friends_ids"], [23, 1, 1, "", "get_friends_list"], [23, 1, 1, "", "get_home_timeline"], [23, 1, 1, "", "get_list_members"], [23, 1, 1, "", "get_list_memberships"], [23, 1, 1, "", "get_list_statuses"], [23, 1, 1, "", "get_list_subscribers"], [23, 1, 1, "", "get_list_subscriptions"], [23, 1, 1, "", "get_mentions_timeline"], [23, 1, 1, "", "get_place_trends"], [23, 1, 1, "", "get_retweeters_ids"], [23, 1, 1, "", "get_retweets"], [23, 1, 1, "", "get_supported_languages"], [23, 1, 1, "", "get_user_timeline"], [23, 1, 1, "", "lookup_status"], [23, 1, 1, "", "lookup_user"], [23, 1, 1, "", "make_dataframe"], [23, 1, 1, "", "retweeted_of_me"], [23, 1, 1, "", "search"], [23, 1, 1, "", "search_users"], [23, 1, 1, "", "set_auth_params"], [23, 1, 1, "", "show_lists"], [23, 1, 1, "", "show_owned_lists"]], "advertools.url_builders": [[24, 1, 1, "", "url_utm_ga"]], "advertools.urlytics": [[25, 1, 1, "", "url_to_df"]], "advertools.word_frequency": [[26, 1, 1, "", "word_frequency"]], "advertools.word_tokenize": [[27, 1, 1, "", "word_tokenize"]], "advertools.youtube": [[28, 1, 1, "", "activities_list"], [28, 1, 1, "", "captions_list"], [28, 1, 1, "", "channel_sections_list"], [28, 1, 1, "", "channels_list"], [28, 1, 1, "", "comment_threads_list"], [28, 1, 1, "", "comments_list"], [28, 1, 1, "", "guide_categories_list"], [28, 1, 1, "", "i18n_languages_list"], [28, 1, 1, "", "i18n_regions_list"], [28, 1, 1, "", "playlist_items_list"], [28, 1, 1, "", "playlists_list"], [28, 1, 1, "", "search"], [28, 1, 1, "", "subscriptions_list"], [28, 1, 1, "", "video_categories_list"], [28, 1, 1, "", "videos_list"]]}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "function", "Python function"], "2": ["py", "class", "Python class"], "3": ["py", "attribute", "Python attribute"], "4": ["py", "method", "Python method"]}, "objtypes": {"0": "py:module", "1": "py:function", "2": "py:class", "3": "py:attribute", "4": "py:method"}, "terms": {"": [0, 1, 2, 4, 6, 7, 9, 10, 11, 13, 14, 17, 18, 19, 20, 23, 25, 26, 28, 29, 30, 31, 32], "0": [4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 25, 26, 28, 30], "00": [14, 17, 18, 19, 28], "000": [4, 18, 23, 27], "0000": 14, "000000": 26, "000b": 20, "001": 6, "002": 6, "0039": 20, "0043": 20, "004a": 20, "006f": 20, "00954418": 14, "00987329": 14, "00a1": 20, "00bf": 20, "00c2": 20, "00ce": 20, "00e6": 20, "00z": [18, 19, 28], "01": [7, 14, 17, 18, 19, 28, 30], "011": 6, "012": 6, "0126707": 14, "0129998": 14, "0133289": 14, "014": 6, "0185": 9, "0185947": 14, "018jz": 28, "018w8": 28, "019582": 14, "019_rr": 28, "01a2": 20, "01cgz": 28, "01h6rj": 28, "01h7lh": 28, "01k8wb": 28, "01lyv": 28, "01sjng": 28, "01t00": [18, 28], "02": [6, 10, 17, 19, 30], "020": 6, "0213921": 14, "021bp2": 28, "022dc6": 28, "023": 6, "0233256": 7, "024": [19, 29], "0242": 6, "024x1": 29, "0253415": 14, "025zzc": 28, "026063": 7, "0270201": 7, "0270483": 10, "0271282": 10, "0279444": 7, "027x7n": 28, "0281389": 14, "028sqc": 28, "029949": 14, "02d86a3cea00007e9edb0cf2000000": 20, "02d86a3e0e00007e9edb0d72000000": 20, "02d86a3e1300007ec2a808a2000000": 20, "02d86a3e140000d437b81532000000": 20, "02d86a3e150000d423322742000000": 20, "02d86a494f0000d437b828b2000000": 20, "02d86a4a7f00007e9edb13a2000000": 20, "02d86a4a7f00007ec2a811f2000000": 20, "02d86a4a7f0000d423209db2000000": 20, "02d86a4a7f0000d423323b42000000": 20, "02hygl": 28, "02jjt": 28, "02lkt": 28, "02mscn": 28, "02ntfj": 28, "02vx4": 28, "02vxn": 28, "02wbm": 28, "03": [11, 17, 19, 30], "0315945": 14, "032tl": 28, "037hz": 28, "03_d0": 28, "03c3": 20, "03glg": 28, "03hf_rm": 28, "03t17": 19, "03tmr": 28, "04": [10, 19, 30], "0403l3g": 28, "0410tth": 28, "041xxh": 28, "0477209": 14, "04q1x3q": 28, "04rlf": 28, "05": [19, 20, 30], "05qjc": 28, "05qt0": 28, "05rwpb": 28, "06": [11, 19, 30], "0612363": 7, "0630789": 7, "06442": 10, "064t9": 28, "066667": 26, "066wd": 28, "068hy": 28, "06bvp": 28, "06by7": 28, "06cqb": 28, "06j6l": 28, "06ntj": 28, "07": [17, 19, 30], "0701004": 14, "0710e93d610dd8c3": 10, "0774069": 19, "079844": 7, "07_53": 28, "07bs0": 28, "07bxq": 28, "07c1v": 28, "07yv9": 28, "08": [17, 19, 30], "08427": [18, 28], "087985": 17, "08t17": 19, "09": [17, 19, 20, 30], "090302_gazaconferenciaml": 19, "090409_machienhuu_revisit": 19, "090421_mqm_speaks_rza": 19, "090524_paquistaoupdateg": 19, "090618_tomtest": 19, "090620_as_iraq_explosion_tc2": 19, "090620_iraq_blast_tc2": 19, "090622_me_egypt_us_tc2": 19, "090622_me_worldbank_tc2": 19, "090623_egitomilitaresfn": 19, "090623_iz_cairo_russia_tc2": 19, "090623_mz_leaders_lifespan_tc2": 19, "090624_me_inpictures_brazil_tc2": 19, "090624_mz_wimbledon_tc2": 19, "090625_sf_tamim_verdict_tc2": 19, "090628_rn_pakistani_soldiries_ambush": 19, "090629_om_pakistan_report_tc2": 19, "090715_hillary_iran_cq": 19, "090723_ae_silwan_tc2": 19, "090729_iraquerefenbritsfn": 19, "090830_ugc_ddh_sand": 19, "090831_dalailamataiwan": 19, "090901_japecontask": 19, "090901_putin_regret_pact": 19, "090901_tiananmen_movi": 19, "098wr": 28, "09kqc": 28, "09s1f": 28, "09t13": 19, "09t15": 19, "09xp_": 28, "0_larg": 11, "0b1553ff703bbd07ac8fe73e6d215888": 7, "0b1vjn": 28, "0bzvm2": 28, "0c79465a9793low": 19, "0cff645fbb74c21791568b78a888967d": 19, "0d790f23c36dlow": 19, "0f2f9": 28, "0g293": 28, "0ggq0m": 28, "0glt670": 28, "0gywn": 28, "0jm_": 28, "0kt51": 28, "1": [1, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 23, 25, 26, 27, 28, 30], "10": [2, 6, 7, 8, 9, 10, 14, 17, 18, 19, 20, 23, 25, 26, 30], "100": [6, 7, 14, 18, 20, 23, 26], "1000": [4, 18, 28], "10000ft": [18, 28], "101": [6, 14], "101e": 20, "102": 6, "1024px": 7, "103": 14, "104": [9, 16, 20, 26], "105": [6, 7, 14], "10536": 6, "1058": 6, "10586": 6, "107": 6, "108": [6, 17], "1080p": [18, 28], "1083": 10, "109": 14, "1090": 14, "1093": 11, "10940941449492": 7, "1095": 14, "10968": 19, "10_0_1": 6, "10_11_2": 6, "10t17": 19, "11": [6, 7, 8, 9, 10, 14, 17, 18, 19, 20, 30], "110": 14, "111": 6, "1111": 6, "111111": 26, "1112": 6, "1113": 6, "1114": 6, "11264": 6, "1127_16056": 6, "113": 14, "1132": 9, "1149": 9, "1152": 6, "116": 6, "1160": [9, 26], "116a": 6, "117821": 14, "118": [6, 14], "118614": 10, "119": [6, 14], "11_0": 6, "11e1": 19, "11t17": 19, "12": [6, 7, 9, 11, 14, 17, 19, 20, 23, 30], "120": 11, "1202": 6, "1204": 19, "1210": 14, "122": [18, 28], "123": [6, 9], "1234567890\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669\u32ba\ud804\udc5b\ud800\udd0d\ud802\udcaa\u24f2\ud804\udc63\ud800\udd28\ud802\udd1b": [0, 29, 30, 31], "124": 32, "125": [2, 6, 16], "126": [9, 26], "1261": 14, "127": 6, "1274": 19, "1285": 19, "129": 14, "1293": 19, "12_0": 6, "13": [2, 6, 7, 9, 10, 11, 12, 14, 17, 19, 30], "130": [2, 14, 16, 19], "1303": 11, "1306": 12, "131k": 32, "132": 14, "13251": 12, "13270": 10, "133333": 26, "13343": 6, "1346": 9, "135": [6, 14], "1350": 19, "13537530305428": 7, "136": 6, "137": 14, "13_0": 6, "13c3": 20, "14": [6, 7, 9, 12, 14, 17, 19, 30], "140": 23, "14022": 19, "1415": 14, "143": 14, "1435": 12, "14393": 6, "146": [14, 17], "147": 17, "148": 17, "149": 17, "149416": 14, "14_0": 6, "14a403": 6, "14c904a172315a4922f4d28948b916c2": 10, "15": [2, 6, 7, 8, 9, 10, 11, 12, 14, 19, 25, 28, 30], "150": [7, 17], "1500m": [18, 28], "1506": 19, "1509": 12, "150px": 7, "1513102854": 11, "152": [6, 14], "15254": 6, "1534": 19, "154": 14, "154258": 14, "1545": 14, "1555": 19, "157": [9, 26], "1585538956622": 19, "1585539039190": 19, "1585539054298": 19, "1585539172701": 19, "1585539206866": 19, "1585539237156": 19, "1585539358901": 19, "1585539536519": 19, "159": [6, 9], "1595": [9, 26], "15_0": 6, "15_4": 6, "15a372": 6, "15a5341f": 6, "15a5370a": 6, "15e148": 6, "16": [6, 9, 11, 14, 17, 19], "16041": 6, "1605": 9, "1622738336": 11, "163": 14, "164": 14, "1647": 20, "165": [9, 26], "1650": 6, "1657": 19, "1664": 14, "1677": 19, "16a366": 6, "17": [10, 14, 16, 17, 19, 20, 30], "170": [4, 10, 20], "170811": 6, "170816": 6, "171": 14, "1710779249": 11, "1710779358": 11, "1710855734": 11, "1710855790": 11, "1711048323": 11, "1727": 9, "173": 14, "174": 14, "1743": 11, "176": 14, "177": 14, "1777": 14, "179": 14, "179365": 14, "18": [10, 11, 14, 19, 30], "180": [10, 20], "180610": 6, "181126": 6, "182": 14, "18323": 7, "184": 19, "1847": 6, "185": [14, 16], "1858": 14, "189": [9, 26], "18c3": 20, "19": [10, 11, 17, 20, 30], "190711": 6, "190821": 6, "191": 12, "19142": 12, "192": 14, "1937": 14, "194": [14, 16], "1959": 19, "196": 14, "1970": [18, 28], "19a346": 6, "19e241": 6, "1a543": 6, "1d9b91664204low": 19, "1f1ee": 8, "1f1f8": 8, "1f32d": 8, "1f33d": 8, "1f340": 8, "1f346": 8, "1f3e9": 8, "1f3fb": 8, "1f3fc": 8, "1f3fd": 8, "1f3fe": 8, "1f3ff": 8, "1f415": 8, "1f436": 8, "1f48c": 8, "1f499": 8, "1f4d8": 8, "1f535": 8, "1f537": 8, "1f539": 8, "1f7e6": 8, "1f91f": 8, "1f94a": 8, "1f951": 8, "1f954": 8, "1f955": 8, "1f9ae": 8, "1f9ba": 8, "1f9e4": 8, "1fad0": 8, "1mb": 19, "1winner": 19, "2": [6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 27, 28, 30], "20": [1, 4, 6, 9, 12, 14, 17, 18, 19, 20, 23, 26, 28, 29], "200": [7, 10, 14, 20, 23, 26], "2000": 9, "200000": 26, "200689": 17, "2008": 19, "2009": [19, 20], "200d": 8, "2010": 19, "20100101": 6, "2011": 19, "2012": 19, "2013": 19, "2014": [14, 19], "2015": [19, 21, 23], "2016": [19, 21], "2017": [7, 11, 19, 28], "2018": [19, 30], "2019": [19, 30], "201e": 20, "202": 20, "2020": [17, 19, 20, 30], "2021": [11, 17, 19, 30], "2022": [10, 14, 17, 19, 30], "2023": 30, "2024": [7, 11, 14, 30], "203": [12, 14], "203191": 12, "205px": 7, "2074": 14, "207504": 14, "208886": 7, "209": 14, "20pct_off": 24, "21": [6, 9, 11, 14, 17, 18, 19, 20, 23, 24, 30], "210": 9, "2103": 19, "210805": 6, "210817": 6, "211": [14, 16], "2125": 6, "2132": 14, "214": 14, "214891": 7, "215": 7, "216237": 7, "217": 14, "2174": 11, "218": 7, "219": 6, "2190": 14, "22": [7, 14, 17, 19, 30], "220263": 7, "222222": 26, "222242": 7, "223": 14, "2240": 14, "224398": 7, "225": [7, 14], "2250": 6, "226": 14, "2287": 19, "23": [14, 17, 19, 20, 30], "230403": 7, "2311": 6, "232845": 14, "234": 14, "237": 14, "24": [17, 19, 30], "241": 14, "243": [14, 16], "244": 14, "245ecfa321e9": 11, "246": 6, "2486": 6, "249": [14, 16], "24c3": 20, "25": [2, 7, 9, 16, 19, 20, 24, 26, 30], "250": 26, "251437": 7, "252": 12, "2526": 6, "253": 6, "254237": 14, "2547": 14, "2564": 6, "257": 9, "257442": 7, "25israel": 7, "26": [1, 6, 7, 9, 10, 14, 20, 24, 30], "266667": 26, "26837": 10, "27": [10, 17, 19, 28, 30], "270": 2, "2704": 6, "273819": 14, "2743": 6, "2769": 19, "279": 9, "27t17": 19, "28": [1, 17], "2815": 9, "283": 17, "284": 17, "2840": 6, "285": 17, "286": 17, "287": 17, "2883": 6, "289": 17, "29": [7, 9, 10, 14, 19, 30], "290": [14, 17], "291": 17, "2910": 19, "292": [17, 19], "292414": 14, "293": 17, "2950": 19, "2951": 19, "2952": 19, "2953": 19, "2954": 19, "2955": 19, "297": 6, "2984": 19, "299218": 7, "2a": 11, "2ad504a1": 20, "2anam": 24, "2b11866": 6, "2d": [18, 28], "2e3b74": 20, "2e454f": 20, "2e494d": 20, "2e4ccb": 20, "2e77d2": 20, "2e93a0": 20, "2ed585": 20, "2ef5ef": 20, "2f1d9f": 20, "2f6d5c": 20, "2nd": 15, "3": [4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 27, 30], "30": [1, 2, 6, 14, 17, 18, 19, 30], "300": 26, "301": [7, 20], "302": [10, 20], "3021": 19, "3029": 6, "305743": 14, "3071": 6, "31": [6, 14, 16, 19, 30], "310": 7, "3112": 6, "3153": 19, "31536000": [10, 20], "316": 12, "3163": 6, "3166": [18, 28], "317541": 7, "318743": 14, "32": [6, 9, 10, 14, 16, 17, 19, 20], "3202": 6, "321": [9, 12], "3250": 19, "33": [9, 12, 17], "3313": 12, "331414": 14, "333": 9, "333333": 26, "3339": [18, 28], "3395": 12, "34": [6, 12], "341287": 14, "3497": 6, "34be9bf74f00low": 19, "34c3": 20, "35": [12, 14, 20], "350831": 14, "3561": 19, "3578": 6, "3587": 12, "36": [6, 14, 17, 20], "3600": 20, "360375": 14, "3665": 9, "3682": 19, "37": [9, 18, 19, 28], "3729": 6, "373": 19, "375": 16, "375724": 17, "38": [6, 14, 20], "39": [14, 16, 17, 19, 20], "3904": 6, "39687acb": 19, "397": 17, "398": 17, "3987": 6, "399": 17, "3a": 11, "3d": [6, 18, 28], "3f44": 20, "3k": 32, "3rd": 6, "4": [6, 7, 8, 9, 11, 12, 13, 14, 16, 17, 19, 20, 23, 25, 26, 28, 30], "40": [17, 18, 20], "400": [6, 17, 26], "401": 17, "403": 7, "404": [14, 20], "4044": 14, "41": [6, 19], "4183": 6, "419": 6, "41_larg": 11, "41b0": 19, "42": [6, 14], "420": 6, "4224": 19, "42307": [18, 28], "426": 14, "4281": 19, "43": 19, "4312": 12, "4324": 6, "435062": 7, "44": [14, 17], "443": 10, "4430": 14, "444": 9, "45": [6, 14, 18], "450": 18, "4515": 6, "456": [6, 9], "46": [6, 14, 19], "4606": 6, "461037": 19, "461815": 17, "462": 12, "466": 6, "4664": 6, "466e": 19, "468588": 17, "4687": [9, 26], "47": [6, 14], "474456": 17, "4758": 14, "47603": 19, "48": [6, 14, 17, 19], "4883": 19, "488ed635": 19, "49": [9, 12, 14], "491": 19, "49462": 12, "499": 14, "49994": 19, "49995": 19, "49996": 19, "49997": 19, "49998": 19, "49999": 19, "4c69": 19, "4f34": 19, "4f7bea3b": 20, "4k": [6, 9], "4th": 6, "5": [1, 6, 7, 8, 9, 12, 13, 14, 16, 17, 18, 19, 20, 21, 26, 30], "50": [1, 6, 17, 18, 26, 29], "500": [6, 7, 12, 17, 18, 23, 26, 28], "5000": 23, "5004": [9, 26], "500px": 7, "501e": 20, "5050": 19, "5056": 12, "505b": 19, "5065": 19, "5068": 19, "5080": 19, "5081": 19, "5082": 19, "5083": 19, "5084": 19, "5085": 19, "51": [6, 14, 16, 17], "510": 19, "52": [6, 17, 19], "520": [14, 19], "528": 6, "53": [12, 14], "531": 6, "533": [6, 19], "534": 6, "536": [6, 17], "537": [6, 14, 17], "538": [17, 32], "539": 17, "54": [6, 12, 19], "540": 17, "5403": 19, "541": 17, "545": 19, "547": 19, "55": [6, 12, 13, 19], "550": 6, "552": 6, "554": 19, "555": 9, "556": 19, "5590": 11, "56": [6, 13, 14, 16, 17, 19], "563": 9, "565": 19, "57": [13, 17, 19], "572": 9, "5745": 9, "576": [11, 12], "58": [6, 13, 14, 17], "584": 12, "59": [6, 13], "596daca7dbaa7e9": 20, "596daca9b91fd437": 20, "596daca9bcab7e9": 20, "596daca9bddb7ec2": 20, "596daca9bf26d423": 20, "596dacbbb8afd437": 20, "596dacbd980bd423": 20, "596dacbd980cd423": 20, "596dacbd99847ec2": 20, "596dacbd9fb97e9": 20, "5e": 24, "5g": 6, "5km": [18, 28], "5th": 6, "5x": [6, 14], "6": [6, 7, 8, 9, 11, 12, 14, 16, 17, 19, 26, 30], "60": [6, 14, 16, 17], "600000": 26, "6005": 7, "600x800": 6, "601": [6, 9], "602": 6, "604": 6, "604800": [10, 20], "605": 6, "61": 6, "610": 6, "619bd9be1d75db41adee6b58": 19, "62": 6, "620": 6, "6201430a1d75db06ae1f62e8": 19, "620345a15577c23d46622256": 19, "6203cd7b5577c23d19622259": 19, "62067f085577c277dd9acf42": 19, "625": [13, 16], "626": 13, "627": 13, "628": 13, "629": 13, "630": 13, "63124": 19, "632": 19, "635": 19, "638": 19, "639": [12, 18, 23, 28], "64": [6, 9, 17], "640": 19, "645": 19, "650": 6, "6543": 12, "66": [14, 16, 20], "6634db63f453": 11, "6666666666666666": 9, "6666666666666667": 9, "666667": 26, "67": [9, 20], "673": 9, "674": 19, "68": [14, 17, 20], "683": 19, "6853": 12, "69": [6, 14, 20], "6dba2aae6b424107": 10, "6p": 6, "6r1oxxopc_larg": 11, "6th": 6, "7": [6, 7, 8, 9, 12, 14, 16, 17, 19, 23, 26, 30], "70": [6, 14, 20], "700": 11, "701e": 20, "702814": 17, "706": 19, "70e04154a": 6, "71": 6, "716": 14, "71756": 12, "72": [9, 14, 17, 28], "7200": 10, "720a8581": 20, "720p": [18, 28], "727": 19, "728x90": 24, "729": 14, "73": [6, 14], "732559": 7, "733": 9, "74": [6, 16], "7412": 6, "744247": 19, "745": 14, "75": [6, 7, 8, 9, 14, 16, 17, 20], "7549": 12, "755": 19, "75911c9e": 20, "75mi": [18, 28], "76": 17, "769": 19, "77": [14, 17], "78": [6, 17], "789": [6, 9], "79": [6, 14, 17], "7a28ef3b": 20, "7c": [18, 28], "7ec3": 20, "7y4sx": 6, "8": [6, 7, 8, 9, 10, 11, 12, 14, 16, 17, 19, 20, 23, 26, 30], "80": [6, 14, 16, 26], "800000": 26, "801e": 20, "81": 14, "815": 19, "8172": 6, "8192": 28, "82": [6, 16, 20], "83": [6, 14], "84": 6, "841": 19, "841851": 19, "85": 6, "852014": 7, "85855c48": 20, "86": 7, "8601": 28, "86199": 19, "862": 19, "8635": 9, "86400": 10, "872": 19, "874": 19, "875": 16, "8760": 12, "88": 6, "8808": 19, "882": 19, "888889": 26, "8893": 6, "8894": 6, "8895": 6, "8896": 6, "8897": 6, "89": 14, "894": 11, "9": [6, 7, 8, 9, 10, 12, 14, 17, 19, 20, 26, 30], "90": [2, 14], "9044": 19, "908": 14, "90b11f47f8b2ab57cb180cbd3c6f06f9": 19, "91": [6, 16], "913": 19, "914107": 19, "91d0": 19, "92": [6, 14], "933333": 26, "94": [6, 14], "94f1": 19, "950": 6, "951053": 17, "96": [6, 16], "97": [9, 16], "973": 19, "98": [6, 14], "98b729fa": 20, "99": [6, 7, 9, 19, 26], "995323": 19, "99900": 6, "9cbd": 19, "9dfdd38a": 20, "9e64": 19, "A": [1, 2, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 23, 25, 26, 27, 28, 29, 32], "AND": 18, "And": [12, 14], "As": [1, 6, 7, 16, 17, 19, 25, 26, 27, 28, 32], "At": [18, 28], "Being": 10, "But": [6, 20, 26], "By": [14, 18, 20, 26, 28], "For": [1, 4, 6, 7, 10, 12, 14, 17, 18, 19, 20, 23, 25, 26, 28], "If": [2, 6, 9, 12, 14, 18, 19, 20, 23, 25, 26, 28, 29, 30], "In": [1, 2, 6, 7, 9, 11, 12, 14, 16, 17, 18, 19, 20, 23, 25, 26, 28, 29, 32], "It": [6, 9, 10, 12, 13, 14, 16, 18, 19, 20, 23, 25, 26, 28, 32], "NOT": [9, 18, 28], "No": [9, 18], "Not": [8, 23], "ON": 23, "OR": [4, 18, 23, 28, 30], "On": [0, 2, 23, 29, 30, 31], "One": [1, 6, 7, 10, 13, 17, 19, 20, 26], "Or": [11, 13, 20, 25], "THE": 24, "That": [2, 6, 9, 20, 26, 28], "The": [0, 1, 2, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 23, 26, 27, 28, 29, 30, 31, 32], "Then": [6, 20, 23], "There": [6, 7, 9, 10, 14, 18, 19, 20, 23, 25, 29, 32], "These": [6, 14, 20, 26], "To": [2, 11, 13, 17, 20, 26, 28, 30], "With": [10, 19, 23, 26, 32], "_": [7, 19], "__cfduid": 20, "__init__": 29, "_dash": 14, "_dict_product": 29, "_escaped_fragment_": 17, "_rotating_proxi": 6, "_static": 20, "_to_df": 32, "a1": 6, "a320": 19, "a824550933a9": 11, "a850165d925db701988daf7ead7492d3": 17, "abbrevi": 9, "abil": [20, 28, 29], "abl": [10, 12, 17, 18, 32], "about": [2, 6, 7, 8, 9, 10, 11, 14, 16, 17, 18, 19, 20, 23, 25, 26, 28, 29, 30, 32], "abov": [6, 7, 11, 12, 20, 23, 25, 26, 29], "abs_freq": [26, 29], "abs_perc": 26, "abs_perc_cum": 26, "abs_wtd_df": 26, "absolut": [0, 16, 17, 25, 29, 30, 31, 32], "abus": 11, "accept": [6, 10, 18, 20, 28, 29], "access": [4, 11, 14, 17, 18, 21, 23, 28, 32], "access_log": 14, "access_token": 23, "accid": 6, "accomplish": [26, 32], "accord": [2, 17, 27], "accordingli": 7, "account": [0, 13, 17, 18, 23, 26, 28, 29, 30, 31], "acess": 4, "achiev": [6, 27, 32], "across": [6, 7, 8, 12, 18, 19, 25, 26, 32], "act": [18, 28], "action": [28, 29], "activ": [8, 9, 12, 18, 19, 28], "activities_list": 28, "actress": 12, "actriz": 12, "actual": [6, 12, 16, 19, 20, 25, 26, 28], "ad": [0, 4, 7, 9, 13, 20, 23, 24, 26, 28, 29, 30, 31, 32], "ad_": [20, 32], "ad_creat": [1, 13, 20, 29, 32], "ad_from_str": [2, 13, 20, 29, 32], "add": [4, 6, 10, 14, 20, 23, 26], "add_prefix": 14, "addit": [0, 2, 6, 9, 11, 14, 16, 18, 23, 26, 28, 29, 30, 31, 32], "addition": [20, 23], "addr": [14, 16], "address": [14, 16, 20, 29], "addressse": 18, "adgroup": [4, 13], "administrativearea": 12, "adress": [14, 16], "adsbot": 6, "adt": 6, "adv": [1, 2, 4, 6, 7, 8, 9, 10, 11, 13, 14, 16, 17, 19, 20, 21, 23, 25, 26], "adv_error": 14, "adv_log": 14, "adv_logs_fin": 14, "advantag": [7, 14], "adventur": 28, "adver": [10, 14, 20], "advertis": 7, "advertoo": 20, "advertool": [1, 2, 6, 7, 8, 9, 10, 11, 13, 14, 16, 17, 19, 20, 21, 23, 25, 26], "advimagespipelin": 11, "aerialmagzc": [9, 26], "afaa7cb5e636low": 19, "affect": 20, "afghan": 19, "afghanistan": 19, "afraid": [9, 26], "aft": 6, "after": [2, 4, 6, 7, 9, 11, 18, 19, 20, 23, 25, 26, 27, 28, 32], "afterward": 26, "aftwmst22": 6, "ag": [10, 20], "again": [6, 14, 19, 20, 25, 26], "against": [12, 23, 26], "agent": [0, 5, 10, 11, 14, 16, 19, 20, 29, 30, 31, 32], "aggreg": 12, "ahrefsbot": 14, "ai": 19, "aid": [8, 32], "aim": 32, "ajax": 17, "album": 7, "alert": 10, "algarv": 1, "algorithm": 32, "alias": 12, "aliaslist": [14, 16], "align": 25, "all": [2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17, 18, 19, 20, 23, 25, 26, 28, 29, 32], "allow": [1, 2, 4, 7, 10, 14, 17, 18, 20, 23, 25, 28, 29, 32], "allowed_domain": [4, 6, 20], "allthreadsrelatedtochannelid": 28, "almost": [7, 14, 26], "alon": 26, "along": [9, 23, 26], "alpha": [18, 28], "alphabet": [12, 18, 28], "alreadi": [6, 26], "also": [1, 2, 4, 6, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 23, 25, 26, 27, 28, 29, 32], "alt": [7, 20, 23, 29], "alt_href": [6, 7, 20], "alt_hreflang": [6, 7, 20], "altern": [6, 17, 20, 29], "although": [2, 20, 26, 32], "altogeth": 14, "alwai": [2, 23, 26, 29], "am": [8, 26], "amazon": [6, 17, 19, 23], "america": 28, "america_latina": 19, "american": 28, "among": 26, "amongst": 26, "amount": [20, 25, 26], "amp": 19, "amplifi": 23, "amz": 10, "an": [1, 2, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 23, 26, 27, 28, 29, 30, 32], "analysi": [0, 9, 10, 16, 19, 20, 23, 29, 31], "analyt": [0, 25, 26, 29, 30], "analyz": [0, 10, 18, 20, 21, 23, 26, 29, 30, 31, 32], "anayz": 7, "anchor": [6, 20], "anderson": 19, "android": [6, 14, 19], "andypayn": 29, "ani": [2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17, 18, 19, 20, 23, 25, 26, 27, 28, 29, 32], "anim": [8, 9], "ann": [9, 26], "annot": 15, "announc": 32, "anonym": 14, "anoth": [2, 7, 10, 13, 14, 19, 25, 26, 27, 32], "anotherexampl": 20, "anotherexmapl": 20, "answer": 19, "anyhow": 26, "anymor": 29, "anyon": 26, "anyth": [6, 23, 26, 29], "anywai": [13, 26], "anywher": [23, 26], "apache_error": [4, 14], "api": [0, 6, 7, 9, 18, 29, 30, 31, 32], "api_vers": 23, "app": [17, 18, 23, 28, 32], "app_kei": [23, 29], "app_secret": [23, 29], "appear": [18, 19, 20, 23, 26, 29], "append": [2, 17, 18, 20, 29], "appl": [6, 17, 26], "applebot": 17, "appletv11": 6, "appletv5": 6, "appletv6": 6, "applewebkit": [6, 14], "appli": [17, 18, 19, 20], "applic": [10, 12, 14, 18, 20, 23, 28], "appliedprivaci": 14, "approach": [0, 13, 25, 26, 29, 30, 31], "appropri": 28, "ar": [2, 4, 6, 7, 9, 10, 11, 12, 13, 14, 17, 18, 19, 20, 23, 25, 26, 27, 28, 29, 30, 32], "arab": [4, 9, 18, 19, 21, 29], "arbitrari": [9, 23, 29], "archiv": 19, "area": [9, 18, 26, 28, 32], "aren": 13, "arg": [10, 11], "argument": [4, 6, 12, 18, 20, 29], "armenian": 9, "armv7l": 6, "around": [8, 9, 26], "arpa": [14, 16], "art": [7, 28], "articl": [0, 7, 19, 25, 29, 30, 31, 32], "articlebodi": 12, "articlelarg": 19, "artilc": 25, "ascend": [7, 26], "asia": 28, "ask": [9, 18, 23, 29], "aspect": 7, "asset": [7, 11], "associ": [18, 26, 28], "assum": [18, 20, 23, 29], "assur": 10, "astronaut": 23, "async": 7, "attach": [18, 23, 28], "attack": 19, "attempt": 20, "attent": 16, "attitud": 23, "attr": 20, "attract": 28, "attribut": [6, 7, 18, 20, 23, 26, 29], "au": 18, "audit": [0, 5, 17, 19, 20, 30], "auditdetail": 28, "australia": [7, 18], "australialett": 7, "auth_endpoint": 23, "auth_param": 23, "authent": [0, 16, 18, 28, 29, 30, 31], "author": [6, 18, 20, 23, 25, 28], "author_url": 20, "auto": [7, 11], "autocomplet": [19, 32], "autom": [6, 10, 17, 32], "automat": [0, 5, 18, 28, 30, 32], "autothrottl": 29, "autothrottle_en": [10, 11], "autothrottle_target_concurr": [10, 11], "avail": [2, 4, 6, 7, 10, 11, 12, 14, 15, 18, 19, 20, 21, 23, 25, 26, 28, 29, 32], "availab": 7, "availablel": 7, "averag": 7, "avocado": 8, "avoid": [11, 25], "aw": 19, "await": 28, "axi": 14, "axr96": 11, "azerbaijani": [4, 21, 29], "b": [9, 10, 14], "b023": 19, "b0aef497": 20, "b12a6923b6ad9102b766352261b1a847": 7, "b3vzy214dhg6odlld29rmgrsdfgt": 6, "b935": 19, "bY": 2, "back": [7, 14, 23, 25, 26, 32], "backend": [10, 20], "bad": 29, "bag": 26, "baiduspid": [14, 17], "ban": 6, "banana": 26, "bandetectionmiddlewar": 6, "bandwidth": 10, "banner": 24, "barcelona": 13, "base": [0, 2, 5, 8, 10, 11, 15, 18, 20, 23, 26, 28, 29, 30, 32], "base64": 11, "basebal": 28, "basi": 10, "basic": [6, 10, 13, 16, 19, 25, 32], "basketbal": [8, 28], "batteri": 6, "bbc": [17, 19], "bbc_sitemap": 19, "beacon": 10, "bearer": 23, "beat": 26, "beauti": 28, "becam": 26, "becaus": [7, 12, 14, 16, 17, 20, 23, 25, 26, 32], "becom": [2, 4, 14, 20, 25, 26], "been": [6, 19, 23, 26, 28], "beer": 23, "befor": [6, 9, 18, 20, 23, 26, 28], "beforehand": 26, "begin": [23, 27, 29], "behalf": [18, 28], "behavior": [0, 4, 10, 11, 19, 28, 29, 30, 31, 32], "behaviour": [11, 20], "behind": 26, "being": [6, 18, 19, 20, 23, 25, 26, 28, 29, 32], "bell": 11, "belong": [12, 16, 19, 23], "below": [12, 18, 20, 26, 27, 28], "ben": 19, "benefit": [2, 25, 32], "bengali": [4, 21, 29], "benton": 19, "besid": 26, "best": [18, 23, 26], "better": [6, 7, 19, 20, 23, 25, 29, 32], "between": [4, 6, 7, 13, 14, 18, 19, 20, 26, 27, 28, 32], "beyond": 26, "bid": 24, "big": [6, 13, 14, 25], "bill": [12, 18], "bing": [6, 12], "bingbot": [6, 14, 17], "bitcoin": 9, "black": [18, 20, 26], "blob": 20, "block": [6, 14, 17, 19, 20, 29, 32], "blockblob": 20, "blocked_url": 14, "blog": 25, "bloomberg": 32, "blowjob": [9, 26], "blown": 26, "blue": [8, 9, 18, 25, 26, 28], "blueberri": 8, "bmw": [1, 13], "boat": [18, 28], "bodi": [8, 9, 10, 20, 23, 32], "body_text": [20, 29], "book": [8, 26], "bool": [1, 2, 7, 9, 12, 13, 18, 19, 20, 23, 25, 26], "boolean": [7, 18, 20, 28], "boost": 18, "boss": [9, 26], "bot": [6, 14, 16], "both": [7, 9, 13, 18, 20, 23, 26, 27, 28, 32], "bottom": [13, 26, 32], "bounc": [20, 26], "box": [8, 17, 19, 28], "brace": 1, "bradford": [9, 26], "brand": [9, 12, 14, 19], "brandingset": 28, "break": [9, 19, 23, 26, 29, 32], "brief": 7, "broad": [4, 13], "broadcast": [18, 28], "broken": 23, "brown": 18, "browser": [6, 12], "bud": 20, "bug": [19, 29], "bui": [4, 13, 17], "build": [6, 8, 10, 14, 32], "builder": [0, 29, 30, 31], "built": 32, "bulgarian": 18, "bulk": [0, 29, 30, 31], "bunch": [7, 20], "buscador": 12, "busi": [19, 28], "butt": 19, "buzz": 11, "buzzfe": 11, "byte": [10, 19, 20, 29], "c": [4, 6, 10, 21], "c01e": 20, "c2": 6, "c2coff": 18, "c_fill": 19, "ca": [18, 26], "cach": [10, 20], "call": [1, 18, 19, 20, 23, 25, 26, 29, 32], "camp": 20, "campaign": [0, 1, 4, 24, 29, 30, 31], "campaign_nam": [4, 13], "can": [0, 2, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 23, 25, 26, 27, 28, 29, 30, 32], "can_fetch": 17, "cannot": [23, 26], "canon": [6, 7, 10, 20, 29], "canonical_par": 6, "cantant": 12, "capit": [1, 2, 4, 12, 13], "capitalize_adgroup": [13, 29], "captial": 2, "caption": [7, 18, 28, 29], "captions_list": 28, "captur": [19, 32], "car": 1, "caramel": [9, 26], "card": [20, 23, 29], "card_uri": 23, "care": 17, "career": [13, 17], "carrot": 8, "case": [1, 2, 4, 6, 7, 8, 9, 10, 11, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 29, 32], "casual": 28, "cat": 8, "catalan": [4, 18, 21, 29], "categor": [14, 25], "categori": [1, 18, 19, 25, 28], "categoryid": 28, "cater": 14, "caus": [23, 29], "cb8db05df7e7": 11, "cc_attribut": 18, "cc_noncommerci": 18, "cc_nonderiv": 18, "cc_publicdomain": 18, "cc_sharealik": 18, "cdn": 10, "celeb": 11, "celebr": 19, "center": [6, 18, 28], "certain": [0, 4, 5, 7, 9, 10, 12, 14, 16, 17, 19, 20, 21, 25, 26, 29, 30], "certainli": 14, "certif": 13, "cgi": 10, "chain": [7, 20], "chanc": 12, "chang": [0, 2, 4, 5, 7, 11, 14, 17, 18, 19, 20, 26, 32], "changefreq": 19, "channel": [18, 28, 29, 32], "channel_df": 18, "channel_id": 18, "channel_sections_list": 28, "channelid": [18, 28], "channelplaylistvideo": 28, "channels_list": 28, "channelsect": 28, "channeltyp": [18, 28], "char": 1, "charact": [2, 4, 9, 18, 23, 26, 27, 28, 29], "characterisit": 14, "characterist": 7, "charset": [6, 7, 10, 20, 29], "chart": 28, "chat": [9, 26], "cheap": 13, "cheat": 29, "check": [6, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 26, 32], "checker": [0, 29, 30, 31], "chelseastewart": 11, "child": 28, "china": 19, "chines": [4, 18, 21, 28, 29], "chinoi": 7, "choic": 8, "choke": [9, 26], "chokkattu": 19, "choos": [18, 20, 28], "chose": 14, "chosen": 14, "christian": 28, "christma": 19, "chrome": [6, 12, 14], "chromebook": 6, "chromecast": 6, "chronolog": [18, 28], "chunk": 7, "chunksiz": 7, "cinta": 1, "circl": 8, "circular": [18, 28], "citi": [1, 12], "ckaiserjr": 9, "claim": [11, 16], "clarif": 29, "clarifi": 29, "class": [10, 11, 20], "classic": 28, "classifi": 28, "clean": 32, "clear": [12, 13, 23], "clearli": [13, 25], "cli": [0, 29, 30, 31], "click": 32, "client": 14, "client_arg": 23, "cline": 19, "clipart": 18, "close": 6, "closedcapt": [18, 28], "closespider_errorcount": [6, 20], "closespider_itemcount": [6, 20], "closespider_pagecount": [4, 6, 20], "closespider_timeout": [4, 6, 20], "cloth": 8, "cloudflar": [10, 20], "cloudfront": 19, "clover": 8, "club": [13, 32], "clue": 17, "cm": [18, 28], "cn": 18, "co": [9, 26], "code": [0, 4, 6, 7, 8, 9, 11, 12, 14, 18, 19, 20, 23, 24, 28, 29, 30, 31, 32], "code_recip": [0, 29, 30, 31], "codepoint": 8, "coder": 32, "coffe": [9, 26], "cohort": [10, 32], "col1": 7, "col2": 7, "collect": [6, 15, 17, 18, 23, 28, 29], "collin": 19, "collinss": 19, "colliss": 25, "color": [18, 20, 25, 26], "column": [0, 4, 6, 9, 10, 11, 12, 13, 14, 17, 18, 19, 20, 23, 25, 26, 29, 30, 31], "column_regex": 7, "columns_typ": 7, "com": [4, 6, 7, 8, 9, 10, 11, 12, 14, 16, 17, 18, 19, 20, 23, 24, 25, 28], "comand": 16, "combin": [4, 12, 13, 14, 15, 17, 18, 20, 32], "come": [2, 13, 14, 26], "comma": [18, 23, 26, 27, 28], "command": [6, 14, 16, 17, 29, 30], "comment": [17, 20, 28, 29, 32], "comment_threads_list": 28, "comments_list": 28, "commentthread": 28, "commerc": [13, 26], "common": [4, 6, 7, 10, 14, 18, 19, 28], "common_with_vhost": [4, 14], "commun": [6, 25, 32], "compani": [9, 12, 17], "compar": [7, 19, 25, 29], "comparison": [7, 12, 16], "comparison_df": 7, "compat": [6, 14, 23], "compil": 15, "complet": [9, 13, 18, 23, 25, 26, 28], "complex": [14, 23], "complic": 23, "compon": [10, 14, 23, 25, 29], "comprehens": 32, "compress": [0, 14, 25, 29, 30, 31], "comput": [6, 14, 20], "concat": [14, 20, 29], "concaten": 19, "concurr": [0, 5, 18, 28, 29, 30], "concurrent_item": 6, "concurrent_request": 6, "concurrent_requests_per_domain": [6, 20], "concurrent_requests_per_ip": 6, "condit": [0, 5, 11, 14, 20, 30], "conduct": 17, "confid": 20, "configur": [20, 28], "conform": 14, "conformig": 14, "conglomer": 12, "congression": 19, "conjunct": [18, 23, 28], "connect": [6, 7, 17, 23, 29, 32], "consecut": [6, 20], "consid": [7, 9, 23, 28, 32], "consider": [2, 26, 32], "consist": [2, 6, 13, 14, 23, 25, 26, 29], "consol": [0, 18, 25, 28, 30], "constantli": 19, "constrain": [18, 28], "consum": [10, 23, 25], "consumed_onli": 23, "consumpt": 7, "contaboserv": 14, "contain": [4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17, 18, 19, 20, 23, 25, 26, 27, 28, 29, 32], "content": [4, 6, 7, 10, 12, 17, 18, 19, 20, 21, 25, 26, 28, 29, 31], "contentdetail": 28, "contentownerdetail": 28, "contenturl": [7, 12], "context": [7, 9, 12, 15], "continu": [10, 32], "contrast": 9, "contribut": 29, "contributornameid": 20, "control": [0, 5, 10, 11, 18, 20, 29, 30, 32], "conveni": 32, "convert": [2, 7, 14, 20, 25, 29, 32], "cookbook": 15, "cool": 32, "coordin": [18, 28], "copi": [0, 5, 30, 32], "copyright": [20, 21], "core": 14, "corn": 8, "coronaviru": 19, "corpor": 12, "corpu": 26, "correct": [9, 12, 14, 29], "correctli": 2, "correspond": 19, "cost": 28, "could": [7, 9, 10, 13, 18, 26, 28, 29], "count": [0, 1, 7, 8, 9, 14, 16, 17, 19, 23, 27, 29, 30, 31, 32], "counti": 19, "countri": [8, 18, 20, 25, 28, 29, 32], "countryau": 18, "countryuk": 18, "cours": [7, 10, 13, 26, 32], "coverag": 29, "covid": 19, "cpu": [6, 14], "cq": 19, "cr": 18, "crash": 20, "crawl": [0, 5, 10, 11, 16, 17, 25, 29, 30, 31, 32], "crawl_df": [10, 20], "crawl_head": [10, 29], "crawl_imag": [11, 29], "crawl_logs_df": 14, "crawl_logs_to_df": 14, "crawl_subset": 7, "crawl_tim": [10, 20], "crawldf": [6, 7], "crawler": [0, 6, 10, 14, 29, 30, 31, 32], "crawllogs_to_df": [14, 29], "crawlyt": [7, 29], "cream": [9, 26], "creat": [0, 6, 7, 8, 10, 11, 12, 13, 18, 23, 25, 28, 29, 30, 31, 32], "creation": 32, "creativ": [18, 28], "creativecommon": [18, 28], "credenti": [12, 18, 23, 28], "credibl": 23, "credittext": 7, "cricket": 28, "crio": 6, "criteria": 28, "criterion": 13, "critic": [12, 18], "crkei": 6, "cro": 6, "croatian": [4, 18, 21, 29], "crossorigin": [20, 29], "crowd": 18, "cse": 29, "css": [0, 4, 6, 29, 30, 31, 32], "css_link": 6, "css_selector": [4, 20, 29], "csv": [9, 11, 14, 26], "ct": [10, 20], "ctbv": 7, "ctrl": 6, "cultur": 19, "cultura_sociedad": 19, "cum_count": [14, 16], "cum_perc": [14, 16], "cumul": [16, 26], "cup": [9, 26], "curat": 28, "currenc": [0, 26, 29, 30, 31, 32], "currency_summari": 9, "currency_symbol": 9, "currency_symbol_count": 9, "currency_symbol_freq": 9, "currency_symbol_nam": 9, "currency_symbols_flat": 9, "currency_symbols_per_post": 9, "current": [6, 7, 14, 19, 20, 23, 28, 29, 32], "cursor": 23, "custom": [0, 4, 5, 10, 11, 14, 18, 29, 30, 31, 32], "custom_set": [4, 6, 10, 11, 14, 20, 29], "customiz": 20, "cutom_set": 6, "cx": 18, "czech": 18, "c\u00f3mo": 9, "d": [4, 7, 9, 10, 14, 18, 26], "d4889b15": 19, "d74930cf": 19, "d76b68d148ddec1efd004": 20, "d9646265": 14, "d99f2368": 20, "d_placeholder_thescen": 19, "dai": [6, 9, 18, 23, 26, 28], "daili": 7, "dalvik": 6, "danielp77": 29, "danish": [4, 18, 21, 29], "dark": 8, "dash": [14, 29], "dash_html_compon": 14, "dashboard": [23, 32], "dashboardom": 10, "data": [0, 4, 9, 10, 11, 13, 17, 18, 19, 25, 26, 29, 30, 31, 32], "databas": [8, 9, 14, 15, 29, 32], "datacamp": 32, "datafram": [0, 6, 7, 8, 9, 11, 12, 13, 17, 18, 19, 20, 23, 25, 26, 29, 30, 31, 32], "dataset": [9, 20, 25, 26, 32], "datatyp": 7, "date": [14, 17, 18, 19, 20, 23, 28, 29, 32], "date_format": [14, 29], "daterestrict": 18, "datetim": [14, 18, 19, 28, 29], "datetime64": 19, "david": 19, "db": 29, "dd": 23, "ddthh": 28, "de": [1, 6, 12, 13, 20], "deal": [7, 32], "death": 19, "debug": [14, 18], "debut": 19, "dec": 19, "decid": [19, 20, 23], "decis": [10, 19, 32], "decod": 25, "decrib": 20, "deep": 20, "default": [0, 2, 4, 5, 7, 9, 12, 13, 14, 18, 19, 20, 23, 25, 26, 27, 28, 29, 30, 32], "default_request_head": [6, 20, 29], "defeat": 19, "defin": [7, 12, 14, 18, 26, 28, 29], "definit": [18, 28], "deflat": [6, 10, 20], "delai": 17, "delet": [7, 29], "delimit": [25, 27], "deliveri": 32, "demot": [18, 28], "denot": 17, "depend": [7, 9, 10, 12, 13, 19], "dependon": 7, "deprac": 29, "deprec": [28, 29], "depth": [0, 5, 10, 20, 30], "depth_limit": [6, 20], "desc_text": 2, "descend": [18, 28], "describ": [7, 13, 26], "descript": [0, 6, 7, 9, 12, 13, 20, 23, 25, 26, 28, 29, 30, 31, 32], "design": [13, 17, 32], "desir": [6, 7, 14, 23], "desktop": [6, 14], "destin": [14, 25], "detail": [2, 4, 6, 7, 10, 11, 13, 14, 17, 18, 20, 23, 28, 32], "detaileddescript": 12, "detect": 23, "determin": [0, 13, 18, 26, 29, 30, 31], "develop": [6, 8, 12, 17, 18, 23, 28], "df": 29, "df1": 7, "df2": 7, "df_subset": 7, "dfp": 7, "di": [0, 5, 11, 30], "diamond": 8, "dict": [9, 10, 11, 14, 19, 20], "dict_kei": [8, 9, 21], "dictionari": [4, 6, 8, 9, 10, 19, 20, 21, 29], "did": [9, 19, 26], "didn": 32, "diff": 7, "diff_perc": 7, "differ": [2, 4, 6, 7, 8, 9, 12, 13, 14, 15, 17, 18, 19, 20, 23, 25, 26, 28, 29, 32], "differenti": 24, "difficult": 13, "digit": [0, 32], "dimens": [18, 28], "dir_1": [19, 25], "dir_2": [19, 25], "dir_3": [19, 25], "dir_4": 19, "dir_5": 19, "dir_6": 19, "dir_7": 19, "direct": [10, 17], "directli": 23, "directori": [0, 11, 29, 30, 31], "dirti": 11, "disabl": [7, 18], "disabled0": 18, "disallow": [17, 20], "disambigu": 23, "discordbot": 17, "discount": 13, "discov": [6, 14, 20, 25, 32], "discoveri": [0, 29, 30, 31], "discreet": 23, "discuss": [9, 26], "diseas": 19, "disk": [7, 14], "dislik": 28, "displai": [9, 14, 17, 18, 28], "dist": 23, "distanc": [18, 28], "distinct": 23, "distinguish": [14, 20], "distort": 25, "distribut": 7, "district": 19, "divers": 32, "divid": [2, 26], "dn": [0, 14, 29, 30, 31], "do": [0, 2, 4, 5, 7, 9, 10, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 28, 29, 30, 32], "doc": [4, 7, 12, 23], "document": [7, 10, 12, 18, 20, 23, 26, 29], "documentaion": 20, "doe": [2, 6, 8, 9, 16, 17, 18, 19, 20, 26, 27, 28, 32], "doen": 14, "doesn": [6, 13, 14, 20, 26], "dog": 8, "dollar": [9, 27], "domain": [0, 4, 5, 7, 10, 14, 16, 17, 18, 20, 25, 29, 30], "domin": 18, "don": [0, 1, 4, 5, 7, 9, 10, 14, 18, 19, 20, 25, 26, 29, 30, 32], "done": [6, 7, 10, 13, 14, 18, 19, 25, 26, 32], "dot": [26, 27], "dotbot": 14, "doubl": [7, 29], "doug": 32, "down": [0, 5, 13, 14, 26, 29, 30, 31, 32], "downgrad": 10, "download": [0, 6, 7, 10, 17, 20, 25, 28, 29, 30, 31, 32], "download_d": [17, 19, 29], "download_delai": [6, 20], "download_func": 11, "download_lat": [7, 10, 20], "download_slot": [10, 20], "download_timeout": [10, 20], "download_timout": 20, "downloader_middlewar": 6, "downsiz": 11, "dp8hsntg6do36": 19, "dr": 14, "draggabl": [20, 29], "drake": 11, "dress": 11, "drink": [8, 9, 26], "drive": 12, "drizzl": [9, 26], "drop": 29, "drop_dupl": 17, "dt": 11, "dtype": [12, 14, 17, 19], "dubai": 1, "due": [9, 14, 23, 26, 28], "duplic": [6, 16, 18, 20, 23], "durat": [18, 28], "dure": [18, 26], "dutch": [4, 18, 21, 29], "duti": 19, "dvp": 6, "dwgyu36up6iuz": 19, "dynam": [10, 20], "e": [1, 4, 6, 7, 10, 12, 13, 18, 20, 24, 25, 26, 28, 29], "e01": 20, "e6653": 6, "e7e15811c65f406f89f89fe10aef29f5": 19, "ea6298160040": 11, "each": [1, 2, 4, 7, 8, 9, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 27, 28, 29, 32], "ear": 8, "earliest": 28, "earth": 23, "easi": [6, 10, 13, 19, 25, 29, 32], "easier": [6, 9, 13, 19, 20, 23, 25, 29, 30, 32], "easiest": 19, "easili": [2, 6, 7, 10, 14, 17, 19, 21, 23, 25, 32], "east": 7, "economi": 11, "ed": 15, "edg": 6, "editor": 19, "educ": 13, "ee0djx6z511tgx88": 10, "effect": [14, 23, 25], "effici": [7, 10, 14, 16, 20, 25], "effort": 23, "eggplant": 8, "eight": 26, "eighti": 17, "either": [7, 9, 13, 14, 18, 19, 23, 26, 28, 29, 32], "elabor": 25, "elect": 19, "electron": 28, "elemenst": 11, "element": [0, 2, 6, 7, 9, 10, 12, 29, 30, 31, 32], "element_1": 29, "element_2": 29, "eleven": 26, "eli": 20, "eliasdabba": 8, "elig": 12, "eln": 19, "els": [2, 12, 23, 26], "elsewher": 26, "email": [7, 12, 24], "emb": 28, "embed": [18, 23, 28], "embedd": [18, 28], "embedhtml": 28, "emerg": 6, "emo": 20, "emoji": [0, 20, 26, 29, 30, 31, 32], "emoji_": 32, "emoji_count": [8, 9], "emoji_df": [8, 29], "emoji_entri": 8, "emoji_flat": [8, 9], "emoji_flat_text": [8, 9], "emoji_freq": [8, 9], "emoji_nam": 9, "emoji_per_post": [8, 9], "emoji_raw": 8, "emoji_search": [8, 29, 32], "emoji_summari": [8, 9], "emoji_text": [8, 9], "emot": [8, 9], "empti": [2, 6, 7, 8, 9, 25, 26, 29], "en": [4, 6, 7, 10, 12, 19, 20, 23, 25, 28], "en_u": 28, "enabl": [12, 18, 29], "encod": [6, 10, 14, 20, 23, 24, 25, 29], "encount": [4, 14, 25], "encourag": 23, "end": [2, 7, 9, 10, 18, 20, 25, 27, 29, 32], "engag": 23, "engin": [0, 10, 12, 13, 14, 17, 19, 29, 30, 31, 32], "english": [4, 18, 21, 25, 26, 29], "enhanc": 25, "enjoi": [9, 26], "enough": [2, 13, 18, 26, 28], "ensur": [2, 20], "enter": [18, 20], "entertain": 28, "entir": [18, 23], "entire_words_onli": 9, "entiti": [0, 12, 17, 19, 23, 29, 30, 31, 32], "entri": 8, "env": 14, "environ": 23, "episod": [18, 28], "equal": [4, 14, 23, 25], "equival": [16, 20], "errback": 10, "errno": [14, 16], "error": [1, 4, 6, 14, 16, 18, 20, 28, 29], "errors_fil": [4, 14], "escap": [18, 28], "espada": 1, "especi": [2, 6, 13, 17, 19, 26], "essenti": [26, 27, 32], "estim": 7, "estonian": 18, "est\u00e1": 9, "etag": [17, 19, 29], "etaospid": 17, "etc": [6, 7, 8, 9, 12, 13, 14, 15, 18, 19, 20, 21, 25, 26, 27, 29, 32], "eu": 6, "eur": 9, "euro": 9, "europ": 7, "european": 32, "evalu": 12, "even": [7, 14, 17, 18, 19, 20, 23, 26, 28, 29], "event": [14, 18, 19, 25, 28], "eventtyp": [18, 28], "ever": [18, 26], "everi": [7, 14, 17, 19, 20, 23, 26, 28, 32], "everyon": [9, 26], "everyth": [12, 23, 26], "everywher": 26, "exact": [4, 6, 13, 23], "exactli": [2, 7, 8, 28], "exactterm": 18, "examl": 4, "exampl": [1, 2, 4, 6, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 27, 28, 29, 32], "example_crawl_1": 6, "example_output": 4, "excalam": 9, "exce": [1, 2], "excel": 32, "except": [6, 23, 25, 26], "excit": [9, 26], "exclam": [0, 27, 29, 30, 31], "exclamation_mark": 9, "exclamation_mark_count": 9, "exclamation_mark_freq": 9, "exclamation_mark_nam": 9, "exclamation_marks_flat": 9, "exclamation_marks_per_post": 9, "exclamation_summari": 9, "exclamation_text": 9, "exclud": [4, 11, 13, 18, 20, 23, 25, 28, 29], "exclude_repli": 23, "exclude_url_param": [4, 20, 29], "exclude_url_regex": [4, 20, 29], "excludeterm": 18, "exclus": [18, 28], "exec": 17, "exist": [7, 8, 9, 13, 14, 23, 29], "exit": [4, 14], "exmapl": [6, 7, 10, 20, 25], "expand": 29, "expect": [8, 10, 11, 20], "expens": 10, "experi": 26, "experiment": [11, 29], "explain": [7, 19, 32], "explan": 20, "explanatori": 25, "explicitli": [18, 23, 25], "explod": 19, "explor": [0, 8, 9, 19, 20, 21, 23, 29, 30, 31], "exploratori": [6, 20], "explosionai": 21, "export": 20, "exposur": 19, "express": [0, 4, 5, 7, 8, 9, 11, 13, 14, 17, 18, 20, 26, 29, 30, 31, 32], "ext_alt_text": 23, "extend": [14, 23], "extens": [6, 14, 17, 18, 25, 29], "extern": [7, 10, 14], "extra": [2, 14, 26], "extra_info": 26, "extract": [0, 5, 7, 14, 17, 25, 29, 30, 31, 32], "extract_": [9, 29, 32], "extract_curr": [9, 29], "extract_emoji": [8, 9, 29], "extract_exclam": [9, 29], "extract_hashtag": [9, 29], "extract_intense_word": [9, 29], "extract_ment": [9, 29], "extract_numb": [9, 29], "extract_quest": [9, 29], "extract_url": [9, 29], "extract_word": [9, 29], "extrem": [6, 10, 13, 20, 32], "ey": 17, "f": [4, 14], "f1rkxqh4kmipzviovwnszequmfjmvlb3": 11, "f53301c8286f9bf59ef297f0232dcfc1": 19, "face": [8, 18], "facebook": [0, 17, 24, 26, 29, 30, 31], "facebookbot": 14, "facebookexternalhit": 17, "failur": 10, "fairli": [25, 32], "fall": [12, 18, 23, 28], "fallback": 1, "fals": [1, 2, 6, 7, 9, 13, 14, 17, 19, 20, 23, 25, 26, 29], "famili": [14, 23], "familiar": 19, "fashion": 28, "fast": [10, 16, 29], "faster": [19, 29], "fastest": 19, "fatal": 19, "favorit": [1, 9, 23, 26], "fb_robot": 17, "fb_test": 17, "fb_userag": 17, "fce856744ed8": 11, "fe546b9b": 19, "featur": [2, 11, 18, 19, 23, 28, 29, 32], "feb": [10, 14], "februari": [19, 28], "feed": [0, 28, 29, 30, 31], "feedback": 30, "feedfetch": 6, "feel": [8, 9, 26, 29], "fetch": [4, 17, 19, 20, 29], "few": [6, 7, 9, 10, 11, 14, 19, 20, 25, 26, 32], "fewer": 2, "ffill": 29, "field": [4, 11, 14, 18, 28, 29], "fifteen": [13, 26], "fifti": 26, "figur": [17, 20, 23, 32], "file": [0, 1, 6, 10, 16, 18, 19, 20, 25, 29, 30, 31, 32], "file_path": 11, "filedetail": 28, "filepath": [4, 7, 20], "filetyp": 18, "fill": 18, "fillna": 29, "filter": [6, 7, 14, 18, 19, 23, 25, 28, 29], "filter_to_owned_list": 23, "final": [13, 19, 25, 28, 29], "find": [6, 8, 9, 18, 20, 23, 25, 26, 28, 32], "fine": 20, "finger": 8, "finish": 6, "finnish": [4, 18, 21, 29], "fire": [6, 9, 26], "firefox": 6, "first": [2, 6, 7, 9, 12, 17, 18, 19, 20, 23, 25, 26, 27, 28, 29, 32], "fish": [18, 28], "fit": [2, 28, 29], "five": [2, 17, 20, 25, 26], "fix": [14, 20, 29], "fl_progress": 19, "flag": [8, 9], "flat": [8, 9], "flavor": 9, "flex": 17, "flexibl": [20, 32], "flight": [18, 23], "fligt": 18, "float": [18, 28], "float64": 19, "focu": [13, 16, 32], "focus": 18, "folder": [6, 11], "follow": [0, 2, 4, 5, 7, 8, 9, 14, 18, 19, 21, 23, 25, 26, 28, 29, 30, 31, 32], "follow_link": [4, 6, 14, 20, 29], "followers_count": [9, 26], "food": [8, 9, 28], "footbal": [8, 13, 19, 28, 32], "footer": [6, 20, 29], "footer_links_href": 6, "footer_links_text": [6, 20], "footer_links_url": 20, "footnot": 21, "forc": 23, "forchannelid": 28, "forcontentown": [18, 28], "fordevelop": [18, 28], "form": [7, 9, 12, 26], "format": [0, 1, 2, 7, 10, 11, 13, 15, 17, 18, 20, 23, 28, 29, 30, 31, 32], "former": 26, "formerli": 26, "formin": [18, 28], "formula": 32, "forth": [18, 23, 28], "forti": [26, 32], "forusernam": 28, "forward": 10, "found": [6, 7, 11, 14, 18, 19, 23], "four": [8, 9, 17, 18, 20, 25, 26, 28], "fr": [12, 18, 20], "fraction": [7, 20], "frag_1": 25, "frag_2": 25, "fragment": [19, 25], "frame": 13, "franc": 18, "free": 18, "freebas": [18, 28], "freixo": 1, "french": [4, 18, 21, 29], "freq": 19, "frequenc": [0, 8, 9, 16, 29, 30, 32], "frequent": [7, 19, 26], "fri": 10, "friend": 23, "from": [0, 5, 7, 11, 12, 14, 15, 16, 17, 18, 19, 20, 23, 25, 26, 27, 28, 29, 30, 31, 32], "front": [9, 26], "fruit": 8, "ft": [18, 19, 28], "fucntion": 9, "full": [1, 6, 7, 8, 10, 11, 13, 19, 20, 23, 26, 29, 32], "fulli": [8, 9, 23], "func": 23, "funcion": 12, "function": [0, 2, 4, 6, 8, 10, 11, 12, 13, 16, 17, 18, 19, 25, 26, 27, 29, 30, 31, 32], "further": [20, 25, 26, 32], "futur": [23, 28], "fxio": 6, "g": [1, 4, 6, 10, 18, 20, 24, 28, 29], "g8231": 6, "g892a": 6, "g920v": 6, "g928x": 6, "g930vc": 6, "g935": 6, "g960f": 6, "g973u": 6, "g980f": 6, "g996u": 6, "g_face": 19, "gadget": 2, "gain": [7, 19, 30], "galaxi": [6, 19], "game": 28, "garda": 16, "gecko": [6, 14], "gen": 6, "gener": [0, 6, 9, 14, 17, 20, 24, 25, 26, 28, 29, 30, 31, 32], "genr": 19, "geocod": 23, "geograph": [18, 28], "geoloc": 18, "geometr": 8, "georgia": 19, "geotag": 23, "german": [4, 18, 21, 29], "gestur": 8, "get": [0, 1, 2, 6, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 23, 25, 26, 27, 28, 29, 30, 31, 32], "get_application_rate_limit_statu": 23, "get_available_trend": 23, "get_favorit": 23, "get_followers_id": 23, "get_followers_list": 23, "get_friends_id": 23, "get_friends_list": 23, "get_home_timelin": 23, "get_list_memb": 23, "get_list_membership": 23, "get_list_status": 23, "get_list_subscrib": 23, "get_list_subscript": 23, "get_mentions_timelin": 23, "get_place_trend": [23, 29], "get_retweet": 23, "get_retweeters_id": 23, "get_supported_languag": 23, "get_user_timelin": 23, "gideon": 19, "gif": 11, "give": [6, 7, 9, 10, 13, 16, 18, 20, 23, 25, 26], "given": [2, 6, 11, 17, 18, 19, 23, 25, 29], "gl": [6, 18], "global": [20, 23, 29], "glove": 8, "gmail": 12, "gmbh": 21, "gmt": [10, 20], "go": [4, 9, 10, 11, 13, 14, 17, 18, 19, 20, 25, 26, 29], "goe": [6, 19], "golf": 28, "goo": 6, "good": [2, 4, 6, 9, 14, 16, 19, 20, 25, 26], "googl": [0, 4, 6, 14, 16, 17, 25, 28, 29, 30, 31, 32], "google_robot": 4, "googlebot": [6, 14, 16, 17], "googleoth": 6, "googleproduc": 6, "googletagmanag": 6, "googtwfb": 17, "got": 20, "gp": 17, "gr": 14, "grai": 18, "gram": [0, 4, 29, 30, 31], "granular": 20, "graph": [0, 20, 29, 30, 31], "graphic": 13, "grayscal": 18, "great": [1, 9, 20, 25, 26, 32], "greater": 23, "greek": [4, 9, 18, 21, 29], "green": [9, 18, 26], "grin": 8, "group": [2, 4, 7, 8, 9, 13, 17, 29, 32], "groupbi": 17, "gtm": 6, "gtm_noscript": 6, "gtm_script": 6, "guarante": 23, "guid": [8, 28], "guide_categories_list": [28, 29], "guidecategori": 28, "guitar": 13, "gunicorn": 10, "gwnlj8m99yumucgdd6ytm": 10, "gx12": 2, "gz": [19, 29], "gzip": [6, 10, 20], "h": [4, 7, 9, 14, 29], "h1": [6, 7, 20], "h2": [6, 7, 20, 29], "h3": [6, 7, 10, 20], "h4": 6, "h5": 6, "h6": [6, 7, 20], "h_180": 19, "ha": [2, 6, 7, 9, 13, 16, 17, 19, 20, 23, 25, 26, 28, 29, 32], "haaatttteee": 9, "had": [6, 8, 9, 26], "haftungsbeschr\u00e4nkt": 21, "hahahahahahaha": [9, 26], "haiku": 23, "half": 26, "hama": 7, "han": [18, 28], "hand": [8, 12, 13], "handl": [0, 6, 14, 16, 17, 18, 23, 29, 30, 31, 32], "handler": 14, "hannahdobro": 11, "hant": [18, 28], "happen": [6, 14, 25], "happi": [14, 23], "hard": [0, 5, 30], "hash": 25, "hashtag": [0, 15, 17, 23, 26, 29, 30, 31, 32], "hashtag_count": 9, "hashtag_freq": 9, "hashtag_raw": 15, "hashtag_summari": 9, "hashtags_flat": 9, "hashtags_per_post": 9, "hate": 23, "hauptstadt": 12, "have": [1, 2, 4, 6, 7, 8, 9, 10, 12, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 27, 28, 29, 32], "hbcz": 7, "hc": 7, "hd": [18, 28], "hdx": 6, "he": [6, 9, 19, 26], "head": [6, 7, 8, 9, 10, 13, 14, 17, 19, 20, 26, 29, 30], "header": [0, 4, 5, 7, 17, 20, 29, 30, 31, 32], "header_links_href": 6, "header_links_text": [6, 20], "header_links_url": 20, "headers_df": 10, "headers_spid": 10, "headersspid": 10, "headlin": 32, "health": [2, 28], "heart": 8, "hebrew": [4, 18, 21, 29], "height": [7, 11, 20, 28, 29], "heldforreview": 28, "hello": [8, 9, 17, 20], "help": [4, 6, 7, 9, 10, 14, 16, 18, 19, 20, 23, 25, 27, 29, 32], "helper": [8, 27], "henc": 26, "her": 26, "here": [2, 7, 8, 9, 10, 13, 14, 18, 19, 20, 25, 26, 32], "hereaft": 26, "herebi": 26, "herein": 26, "hereupon": 26, "herself": 26, "heru80fdn": 19, "hfubv4v3ai": [9, 26], "hi": [9, 19, 26], "hidden": 26, "hierarchi": 12, "high": [6, 12, 18, 19, 20, 28], "higher": [12, 18, 19, 28, 29], "highest": [18, 28], "highli": [7, 18, 20, 28, 32], "highrang": 18, "hilari": 23, "him": 26, "himself": [19, 26], "hindi": [4, 19, 21, 29], "hip": 28, "hit": [0, 5, 10, 20, 30], "hl": [18, 28], "ho": 12, "ho4kx7zz24": [9, 26], "hobbi": 28, "hockei": 28, "hodgeman": 19, "hol": 14, "hola": 9, "home": [4, 17, 28], "home_timelin": 23, "honnib": 21, "hood": 10, "hop": 28, "hope": [9, 26, 32], "hopefulli": 32, "hopewel": 19, "host": [14, 16, 18, 29], "host_df": [14, 16], "hostnam": [14, 16, 25], "hot": [8, 9], "hotel": [1, 8, 18], "hour": [6, 23], "hous": 19, "how": [0, 2, 5, 7, 8, 9, 10, 13, 17, 19, 20, 23, 25, 26, 29, 30, 31, 32], "howev": [14, 18, 23, 26, 28], "howsearchwork": 17, "hp": 17, "hq": 18, "href": [6, 20, 29], "hreflang": [6, 10, 29], "ht": 20, "htc": 6, "hte": 19, "htm": [6, 14], "html": [4, 6, 7, 10, 14, 17, 19, 20, 23, 25, 28, 29], "http": [4, 6, 7, 8, 9, 10, 11, 12, 14, 17, 19, 20, 23, 25, 26], "httperror_allow_al": [10, 11], "huge": 18, "hulu": 19, "humor": 28, "hundr": [10, 20, 26], "hungarian": [4, 18, 21, 29], "hurri": 6, "husband": [9, 26], "hydrat": 23, "hyphen": 2, "i": [0, 1, 2, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 27, 28, 29, 30, 32], "i18n_languages_list": 28, "i18n_regions_list": 28, "i18nlanguag": 28, "i18nregion": 28, "ia_archiv": 17, "ibmo9hrztai": 10, "ic": [9, 26, 28], "iceland": 18, "icj": 7, "icon": [7, 18], "id": [6, 10, 12, 18, 20, 23, 28], "idea": [6, 7, 9, 13, 20, 26, 32], "ideal": [17, 25, 26, 32], "ident": 23, "identifi": [7, 18, 20, 23, 25, 28], "ifram": 6, "iframe_src": 6, "ignor": 26, "il": 6, "imag": [0, 6, 10, 12, 14, 17, 18, 19, 20, 23, 29, 30, 31], "image_df": 7, "image_dir": 11, "image_loc": [11, 19], "image_spid": 11, "image_url": 11, "imagespid": 11, "imagespipelin": 11, "img": [7, 11, 20, 29], "img_": [7, 20], "img_alt": [7, 20], "img_bord": 7, "img_decod": 7, "img_df": 7, "img_height": 7, "img_load": 7, "img_siz": 7, "img_src": [7, 20], "img_srcset": 7, "img_summari": 7, "img_width": 7, "imgcolortyp": 18, "imgdominantcolor": 18, "imgitem": 11, "imgr": 17, "imgsiz": 18, "imgtyp": 18, "immedi": [7, 17, 23], "implement": 29, "import": [0, 1, 2, 6, 7, 8, 9, 10, 11, 13, 14, 16, 17, 19, 20, 21, 23, 25, 26, 27, 28, 29, 30, 31, 32], "importantli": [14, 25, 32], "imposs": [7, 25], "improv": [18, 26, 29], "inc": 12, "includ": [0, 4, 5, 7, 9, 11, 13, 14, 17, 18, 19, 20, 23, 25, 27, 28, 29, 30], "include_card_uri": 23, "include_ent": 23, "include_ext_alt_text": 23, "include_img_regex": 11, "include_rt": 23, "include_url_param": [4, 20, 29], "include_url_regex": [4, 20, 29], "include_user_ent": 23, "includesubdo": 20, "includesubdomain": 10, "inclus": [18, 28], "inconsist": 25, "inde": 26, "independ": [7, 28, 32], "indepent": 20, "index": [0, 4, 7, 10, 12, 14, 18, 20, 23, 29, 31, 32], "india": 19, "indic": [6, 10, 17, 18, 20, 23, 25, 28, 29], "individu": [18, 28], "indonesian": [4, 18, 21, 29], "industri": [11, 17, 18, 19, 32], "inermedi": 7, "infer": 9, "influenti": 23, "info": [11, 14, 18, 29], "inform": [7, 8, 10, 12, 14, 16, 18, 19, 20, 23, 25, 26, 28, 29], "inherit": 12, "initi": [6, 12, 29], "input": [1, 2], "insensit": [8, 29], "insert": [1, 25], "insid": 27, "insight": [0, 6, 19, 26, 29, 30, 31, 32], "inspectiontool": 6, "instagram": 23, "instal": [4, 6, 30], "instanc": [9, 12, 29], "instant": [0, 29, 30, 31], "instead": [4, 9, 17, 20, 23, 25, 26, 28, 29], "instruct": [17, 18, 28], "int": [1, 7, 9, 11, 12, 13, 16, 18, 19, 23, 26, 29], "int64": [7, 14, 17, 19, 29], "intact": 2, "integ": [2, 18, 28], "integr": [13, 32], "intel": 6, "intend": [18, 23, 28], "intens": [9, 29], "intent": [13, 32], "interact": [19, 23, 29, 32], "interest": [7, 9, 10, 11, 12, 13, 14, 17, 19, 20, 26, 30], "interfac": [10, 18, 23, 29, 30, 32], "interior": 23, "intermedi": 7, "intern": [7, 10, 18, 19, 20, 23], "internacion": 19, "internal_url_regex": 7, "internation": 18, "internet": 19, "interpret": 7, "interv": [6, 32], "interview": 19, "introductori": 32, "invalid": 29, "invert": 9, "inverview": 19, "investig": 6, "invideopromot": 28, "io": [10, 16, 20, 23], "iowa": 19, "ip": [14, 16, 18, 20, 29], "ip_address": [14, 16, 20], "ip_host_dict": 14, "ip_list": [4, 16], "ipaddress": 6, "ipaddrlist": [14, 16], "iphon": 6, "iphone12": 6, "iphone13": 6, "iphone14": 6, "iphone7c2": 6, "iphone9": 6, "ir": 16, "ir88": 19, "iran": 19, "iraq": 19, "irish": [4, 21, 29], "ismap": [20, 29], "iso": [12, 18, 23, 28], "israel": 7, "issu": [6, 7, 29, 32], "italian": [4, 18, 21, 29], "item": [6, 7, 11, 13, 18, 20, 23, 25, 28, 29], "item_a": 20, "item_b": 20, "item_pipelin": 11, "iter": 2, "its": [0, 1, 4, 5, 7, 8, 10, 12, 13, 14, 19, 20, 23, 25, 26, 28, 30, 32], "itself": [14, 20, 25, 26], "ivborw0kggoaaaansuheugaaafqaaaa7camaaadsf118aaaap1bmveuaaadigxpohbk5ewdfghi5fwi8grteghe7eqdmhr7": 11, "iyl50": 10, "j": [6, 14], "j706x": 6, "j8110": 6, "ja": 23, "janeiro": 13, "japanes": [4, 18, 21, 29], "java": 14, "javascript": 6, "jazz": 28, "jdq39": 6, "jenni": 9, "ji": 12, "jin": 12, "jl": [4, 6, 7, 10, 14, 17, 20, 29], "jl_filepath": 7, "jl_subset": [7, 29], "jl_to_parquet": [7, 29], "jo": 4, "job": [0, 5, 13, 29, 30, 32], "jobdir": 6, "joejoinerr": 29, "john": [9, 26], "join": [9, 20, 26], "josephlongo": 11, "josh": 11, "jpeg": 10, "jpg": [7, 11, 19], "jpy": 9, "js_script_src": 6, "js_script_text": 6, "json": [7, 10, 14, 18, 20, 23, 29], "json_norm": [14, 29], "jsonld": 7, "jsonld_": [7, 20], "jsonld_1_": 20, "jsonld_error": 29, "jsonlin": [7, 10, 20], "julian": 19, "jung": 12, "jungl": 12, "just": [2, 4, 8, 9, 11, 14, 18, 20, 23, 26, 32], "k": [14, 26], "k1": 6, "ka": 19, "kaggl": [8, 32], "kang": 12, "kansa": 19, "kazakh": [4, 21, 29], "keep": [1, 6, 7, 8, 12, 13, 16, 17, 20, 23, 26, 29, 32], "keep_equ": 7, "kei": [6, 8, 9, 12, 18, 19, 20, 21, 23, 25, 26, 28, 29], "kept": 32, "key_nam": 9, "keyword": [0, 8, 12, 20, 21, 26, 29, 30, 31, 32], "keywords_df": 13, "kfpdr3hupi": [9, 26], "kfthwi": 6, "kg_df": 12, "khtml": [6, 14], "kid": [9, 26], "kill": 6, "kilomet": [18, 23, 28], "kind": [4, 10, 20, 29, 32], "kindl": 6, "kiwi": 26, "km": [18, 23, 28], "know": [2, 7, 9, 13, 14, 19, 20, 25, 26, 32], "knowledg": [0, 28, 29, 30, 31], "knowledge_graph": [12, 29], "known": [10, 20, 23, 26, 29], "korea": 12, "korean": [12, 18], "ktu84m": 6, "kw_": [20, 29, 32], "kw_broad": 13, "kw_df": 13, "kw_exact": 13, "kw_gener": [13, 20, 29, 32], "kw_modifi": 13, "kw_neg_broad": 13, "kw_neg_exact": 13, "kw_neg_phras": 13, "kw_phrase": 13, "kwarg": [9, 10, 11], "kxlspoodchrqwiwbxi85q6kc9pnehscmhj0vjgppuac3lwqo": 11, "l": 4, "l1": 20, "lab": 19, "label": 13, "lamborghini": 1, "land": [2, 13, 32], "lang": [17, 23], "lang_": 18, "lang_ar": 18, "lang_bg": 18, "lang_c": 18, "lang_ca": 18, "lang_d": 18, "lang_da": 18, "lang_el": 18, "lang_en": 18, "lang_et": 18, "lang_fi": 18, "lang_fr": 18, "lang_hr": 18, "lang_hu": 18, "lang_i": 18, "lang_id": 18, "lang_it": 18, "lang_iw": 18, "lang_ja": 18, "lang_ko": 18, "lang_lt": 18, "lang_lv": 18, "lang_nl": 18, "lang_no": 18, "lang_pl": 18, "lang_pt": 18, "lang_ro": 18, "lang_ru": 18, "lang_sk": 18, "lang_sl": 18, "lang_sr": 18, "lang_sv": 18, "lang_tr": 18, "lang_zh": 18, "languag": [0, 6, 10, 12, 13, 18, 19, 20, 23, 25, 26, 28, 29, 30, 31, 32], "laptop": 6, "larg": [0, 6, 8, 14, 16, 18, 19, 20, 26, 29, 30, 31, 32], "larger": [18, 23, 28], "last": [1, 2, 9, 17, 19, 20, 25, 26, 29], "last_dir": [19, 25, 29], "lastmod": 19, "lat": 23, "latenc": [7, 10], "later": [0, 5, 14, 19, 30], "latest": [1, 2, 4, 20, 23], "latin": [14, 28], "latitud": [18, 23, 28], "latter": 26, "latterli": 26, "latvian": 18, "layout": [14, 28], "ld": [7, 10, 20, 29], "lead": 18, "leaf": 8, "learn": [13, 19, 20, 26, 28, 32], "leas": 20, "least": [18, 26, 28], "leav": [2, 17], "left": [6, 9, 28], "left_char": 9, "len": [2, 4], "lenght": 29, "length": [1, 2, 10, 19, 23, 25, 26, 27, 29], "lenovo": 6, "less": [18, 20, 23, 26, 28], "let": [1, 2, 7, 9, 12, 13, 17, 18, 19, 20, 26, 28, 32], "letter": [2, 7, 8, 18, 28], "level": [6, 14, 18, 20, 23, 25, 28, 32], "level_or_nam": 18, "lg": 6, "li": [20, 29], "librari": 32, "licens": [12, 18, 28], "life": [9, 26, 30], "lifestyl": 28, "light": [8, 10], "like": [6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 18, 19, 20, 23, 26, 28, 29, 32], "likelyspam": 28, "lilguyisback": 9, "limit": [1, 2, 12, 17, 18, 20, 23, 28, 32], "line": [1, 6, 7, 10, 12, 14, 16, 17, 18, 20, 29, 30], "lineart": 18, "lineup": [9, 26], "link": [0, 4, 5, 9, 10, 18, 19, 23, 25, 28, 29, 30, 31, 32], "link_df": 7, "link_rel_href": 6, "link_rel_rel": 6, "link_rel_stylesheet": 6, "linkedin": 17, "linkedinbot": [14, 17], "links_": 7, "links_frag": 29, "links_href": 20, "links_nofollow": [20, 29], "links_text": [20, 29], "links_url": [20, 29], "linksit": 18, "linux": [6, 14, 29], "lisbon": 1, "list": [0, 1, 2, 5, 7, 8, 10, 11, 12, 13, 14, 16, 17, 18, 19, 21, 23, 25, 26, 27, 28, 29, 30, 31, 32], "list_id": 23, "liter": 12, "lithuanian": 18, "littl": [11, 25, 30, 32], "live": [18, 28, 32], "livestreamingdetail": 28, "ll": [9, 17, 23, 26], "lmy47o": 6, "lmy47x": 6, "load": [14, 20, 22, 29], "loc": 19, "local": [4, 18, 23, 28], "locat": [11, 18, 19, 23, 25, 28, 32], "locationradiu": [18, 28], "log": [0, 5, 16, 18, 19, 20, 25, 28, 31, 32], "log_error": 14, "log_field": 14, "log_fil": [4, 6, 14, 20], "log_format": [4, 14], "logic": 18, "login": [14, 17], "logo": [7, 11], "logs_df": 14, "logs_file_path": 14, "logs_to_df": [0, 29, 30, 31], "lokal": 12, "lol": [9, 26], "lon": 23, "long": [0, 1, 4, 7, 9, 14, 16, 17, 18, 23, 25, 28, 29, 30, 31, 32], "longdesc": [20, 29], "longer": [1, 2, 9, 18, 26, 28, 29], "longitud": [18, 23, 28], "look": [2, 7, 13, 19, 23, 26], "lookout": 10, "lookup": [0, 14, 23, 29, 30, 31], "lookup_statu": 23, "lookup_us": 23, "looooooovvvve": 9, "looooooveee": 9, "loop": [18, 23, 29], "loos": 9, "lose": 17, "lost": [2, 6], "lot": [2, 7, 10, 14, 19, 25, 32], "love": [8, 23, 26], "love_emoji": 8, "lower": 20, "lowest": [18, 28], "lowrang": 18, "lr": 18, "lrx22g": 6, "lumia": 6, "luxuri": 13, "m": [4, 9, 14, 17, 18, 19, 20, 26, 28, 32], "m9": 6, "ma": [10, 20], "mac": 6, "machin": [4, 19, 32], "macintosh": 6, "made": [7, 9, 16, 23, 26], "mai": [14, 17, 18, 20, 23, 26, 28], "mail": 16, "main": [2, 7, 12, 17, 18, 19, 20, 23, 25, 26, 29, 32], "mainli": [6, 11, 13, 16, 17, 26, 27], "mainten": 10, "major": [14, 17, 19, 23, 32], "make": [0, 1, 2, 5, 7, 9, 10, 14, 16, 17, 18, 19, 20, 23, 25, 26, 29, 30, 32], "make_datafram": 23, "mammal": 8, "manag": [6, 7, 10, 18, 23, 28, 32], "managedbym": 28, "mango": 26, "mani": [1, 2, 6, 7, 8, 9, 11, 12, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 28, 32], "manipul": 32, "manner": [12, 25], "manual": 6, "map": [4, 7, 9, 12, 13, 20, 23, 32], "march": 19, "mark": [9, 23, 25, 26, 27, 29], "market": [0, 12, 13, 18, 20, 24, 29], "martial": 28, "marvinmilton2": [9, 26], "masscan": 14, "massiv": [7, 14, 16, 25], "master": [6, 10, 20], "match": [4, 7, 8, 9, 11, 12, 13, 14, 18, 19, 20, 23, 28, 29], "match_typ": 13, "matter": [4, 13], "matthew": 21, "max": [4, 6, 10, 20], "max_column": 14, "max_id": 23, "max_len": [1, 4, 13], "max_work": [16, 19, 29], "maxheight": 28, "maxim": 23, "maximum": [1, 2, 12, 13, 16, 18, 19, 20, 23, 28], "maxresult": [18, 28], "maxwidth": 28, "mayb": [9, 11, 13, 17], "mb": 29, "mcilroi": 32, "mckinlei": 19, "mckinleyd": 19, "me": [26, 27], "mean": [7, 9, 10, 12, 13, 14, 17, 18, 19, 20, 23, 25, 26, 32], "meaning": [19, 26], "meanwhil": 26, "measur": [6, 18, 28, 29], "media": [8, 9, 11, 14, 20, 21, 23, 25, 26], "mediapartn": 6, "medium": [8, 18, 24, 28], "meet": [9, 18, 26, 28], "mega": 19, "megabyt": 32, "member": 23, "membership": 23, "memori": [7, 14, 20, 25], "mention": [0, 12, 15, 23, 25, 26, 29, 30, 31, 32], "mention_count": 9, "mention_freq": 9, "mention_raw": 15, "mention_summari": 9, "mentions_flat": 9, "mentions_per_post": 9, "mentions_timelin": 23, "merced": 1, "merg": [18, 20, 23, 29], "messag": [4, 14, 29], "meta": [6, 10, 18, 20, 23, 25], "meta_desc": [6, 7, 20], "metadata": [18, 23, 28, 29], "metallica": [9, 26], "metatag": 10, "method": [10, 14, 18, 20, 23, 28, 29, 30], "method_from": 14, "method_to": 14, "metric": 26, "mi": [18, 23, 28], "microsoft": 6, "middl": [7, 29], "middleeast": [7, 19], "middlewar": [6, 14], "might": [2, 4, 6, 7, 8, 9, 10, 11, 13, 14, 17, 18, 19, 20, 23, 25, 26, 28, 30, 32], "mile": 23, "militari": 28, "miller": 19, "million": 26, "min": [7, 14], "min_height": 11, "min_rep": 9, "min_width": 11, "mind": [6, 8, 12, 20, 23, 32], "mine": [21, 26, 27, 28, 32], "mini": 10, "minim": 29, "minimum": 11, "minix": 6, "minnesota": 19, "minor": [14, 29], "minu": [7, 20], "minut": [18, 19, 25, 28], "miss": [2, 9, 19, 26, 29], "missouri": 19, "mistak": 17, "mix": [23, 25, 28], "mj12bot": 14, "mm": [19, 23, 28], "mmb29k": 6, "mmb29p": [6, 14], "mmb29t": 6, "mobil": [6, 14], "mocha": [9, 26], "mode": [0, 4, 5, 9, 23, 29, 30, 31], "model": [2, 14, 19], "model_a": 20, "model_b": 20, "moder": [18, 28], "moderationstatu": 28, "modifi": [4, 8, 9, 10, 11, 13, 17, 19, 20, 21, 23, 26], "modul": [1, 9, 20, 26, 29, 30, 31, 32], "modular": 32, "monitor": 32, "mono": 18, "month": [18, 19, 23], "more": [2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17, 18, 19, 20, 23, 25, 26, 28, 29, 32], "moreov": 26, "morn": [7, 9, 26], "morning_144x144": 7, "most": [1, 2, 6, 7, 9, 10, 13, 14, 18, 19, 20, 23, 26, 28, 29, 32], "mostli": [6, 7, 20, 25, 26, 32], "mostpopular": 28, "motorsport": 28, "move": 26, "movi": [18, 19, 23, 28], "mozilla": [6, 14, 20], "mp4": 19, "mpu": 24, "mra58k": 6, "msg": 29, "msnbot": 17, "much": [7, 19, 20, 26, 29], "multi": [16, 18], "multi_robot": 4, "multimedia": [7, 19], "multin": 12, "multipl": [0, 4, 5, 7, 9, 12, 17, 18, 19, 20, 29, 30, 32], "multipli": 26, "mundo": 19, "music": 28, "must": [14, 18, 23, 26, 28], "my": [0, 1, 5, 9, 20, 26, 30], "my_output_fil": 20, "myrat": 28, "myrecentsubscrib": 28, "myself": 26, "mysit": 24, "mystuff": 17, "mysubscrib": 28, "n": [0, 4, 6, 7, 19, 29, 30, 31], "na": [25, 29], "name": [0, 1, 4, 6, 7, 8, 9, 10, 12, 13, 14, 16, 17, 18, 19, 20, 23, 24, 25, 26, 28, 29, 30, 31, 32], "name_1": 20, "name_2": 20, "nan": [6, 7, 10, 12, 14, 19, 20, 25, 29], "narrow": 28, "nasa": 23, "nat": [17, 19], "natali": 19, "nativ": 23, "native_video": 23, "natur": [8, 9, 11], "nav": [6, 20, 29], "nav_links_href": 6, "nav_links_text": [6, 20], "nav_links_url": 20, "naverbot": 17, "navig": [11, 23], "ncov": 19, "nearli": [9, 26], "nebraska": 19, "need": [1, 2, 4, 6, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 23, 26, 29, 32], "neg": [9, 13, 23], "neither": 26, "neo": 6, "nepali": [4, 21, 29], "nest": [23, 28], "net": [14, 19], "netloc": [19, 25], "network": [9, 25], "never": [9, 26], "nevertheless": 26, "new": [0, 6, 7, 9, 14, 18, 25, 26, 29, 30, 31, 32], "newest": [19, 28], "newlin": 29, "news_keyword": 19, "news_publ": 19, "news_publication_d": 19, "news_titl": 19, "newslett": 7, "next": [9, 13, 20, 26, 28, 32], "next_cursor": 23, "nextpagetoken": [18, 28, 29], "nexu": [6, 14], "nf": 6, "nfl": 19, "nginx": [10, 20], "nginx_error": [4, 14], "ngram": 26, "nh": 19, "nine": 26, "nintendo": 6, "nintendobrows": 6, "nippli": [9, 26], "nix": 29, "nl": 14, "nmf26f": 6, "nobodi": 26, "node": 23, "nofllow": 20, "nofollow": [7, 20, 29], "noindex": 10, "non": [14, 18, 26, 27, 28, 29, 32], "none": [2, 4, 7, 8, 9, 10, 11, 12, 14, 17, 18, 19, 20, 23, 24, 25, 26, 28], "noon": 26, "nor": 26, "normal": [6, 14, 21], "norwegian": [4, 18, 21, 29], "nose": 19, "note": [2, 7, 9, 12, 18, 20, 23, 25, 26, 28, 32], "notebook": 32, "noth": [9, 26], "notic": [9, 17], "noticia": 19, "notna": 7, "notset": 18, "noun": 13, "now": [1, 2, 7, 9, 13, 14, 17, 19, 20, 23, 26, 29], "nowher": 26, "np": [19, 29], "nrd90m": 6, "nt": [6, 14, 20], "null": 23, "num": 18, "num_currency_symbol": 9, "num_emoji": [8, 9], "num_exclamation_mark": 9, "num_hashtag": 9, "num_list": [26, 29], "num_ment": 9, "num_numb": 9, "num_post": [8, 9], "num_question_mark": 9, "num_url": 9, "num_word": 9, "number": [0, 5, 7, 8, 12, 13, 16, 17, 18, 19, 20, 23, 26, 28, 29, 30, 31, 32], "number_count": 9, "number_freq": 9, "number_list": 4, "number_of_emoji": 8, "number_of_hashtag": 9, "number_of_ment": 9, "number_of_numb": 9, "number_of_symbol": 9, "number_of_url": 9, "number_of_word": 9, "number_separ": 9, "number_summari": 9, "numbers_flat": 9, "numbers_per_post": 9, "numer": [7, 23, 28], "nutch": 14, "nvidia": 6, "nx": 6, "nyt": [7, 19], "nyt_crawl": 7, "nyt_new": 19, "nytim": [7, 19], "nz": 18, "o": [6, 15], "oauth_token": [23, 29], "oauth_token_secret": [23, 29], "oauth_vers": 23, "obama": 19, "obei": [0, 5, 11, 14, 30], "obido": 17, "object": [8, 9, 12, 14, 19, 20, 23, 28, 32], "obtain": [23, 25, 26], "occur": [9, 19, 20, 23, 26, 28, 29], "occurr": [9, 26], "odai": 9, "off": [6, 18, 26], "offer": [13, 23], "offic": [9, 26], "offlin": 29, "often": 26, "og": [6, 7, 20, 29], "og_cont": 6, "og_prop": 6, "ohio": 19, "ok": 10, "okai": 9, "old": 7, "older": 23, "oldest": 23, "omit": 23, "onbehalfofcontentown": [18, 28], "onbehalfofcontentownerchannel": 28, "onc": [2, 4, 6, 7, 12, 13, 14, 18, 19, 20, 23, 25, 26, 28, 30, 32], "one": [1, 2, 4, 6, 7, 8, 9, 12, 14, 15, 17, 18, 19, 20, 23, 25, 26, 28, 29, 32], "ones": [8, 9, 10, 14, 16, 19, 20, 26], "ongo": [18, 28], "onli": [0, 1, 4, 5, 7, 9, 10, 11, 14, 15, 17, 18, 19, 20, 23, 24, 26, 28, 29, 30, 32], "onlin": [2, 20], "onto": 26, "opd1": 6, "opd3": 6, "open": [14, 17, 19, 20, 29], "opengraph": [6, 10], "oper": [14, 18, 19, 23, 28, 29], "oppos": [13, 25, 29], "opposit": 9, "opt": 23, "optim": [10, 12], "option": [2, 6, 7, 9, 10, 11, 13, 14, 17, 18, 20, 21, 23, 25, 26, 27, 28, 29, 32], "orang": [18, 26], "order": [4, 7, 9, 11, 12, 13, 18, 20, 28, 29], "order_matt": 13, "org": [4, 7, 8, 10, 12], "organ": 12, "origin": [6, 11, 14, 18, 20, 25, 29], "orterm": 18, "other": [2, 4, 7, 8, 9, 10, 11, 12, 14, 15, 17, 18, 19, 20, 23, 25, 26, 28, 29, 32], "otherwis": [7, 14, 20, 23, 25, 26], "ottawa": 19, "ound": 9, "ouput_fil": 25, "our": [2, 7, 9, 14, 23, 26], "ourl0aefg76m8izrt6eaaaaasuvork5cyii": 11, "ourselv": [14, 26], "out": [1, 9, 12, 14, 17, 18, 20, 23, 25, 26, 28, 32], "outpuf_fil": 20, "output": [4, 6, 7, 10, 11, 14, 20, 23, 25, 29], "output_dir": 11, "output_fil": [4, 6, 7, 10, 14, 17, 20, 25, 29], "output_file1": 7, "output_file2": 7, "outreach": 32, "outsid": [9, 14, 18, 26, 28], "over": [4, 6, 10, 19, 20, 23, 26, 29], "overview": [2, 7, 8, 9, 16, 19, 32], "overwrit": [20, 29], "owen": 19, "own": [7, 9, 13, 14, 18, 23, 25, 26, 28, 32], "owner": [17, 18, 23, 28], "owner_id": 23, "owner_screen_nam": 23, "ownership": 23, "oyl2nhr7e34yujtjw6zmc3am": 11, "p": [20, 29], "packag": [6, 20, 21, 29, 30, 31, 32], "pad": 6, "page": [0, 2, 4, 5, 7, 9, 10, 11, 12, 13, 14, 17, 19, 21, 23, 25, 26, 28, 29, 30, 31, 32], "page_1": 6, "page_2": [6, 7], "page_3": 6, "page_4": 6, "page_5": 7, "page_7": 7, "page_8": 7, "page_9": 7, "pagelet": 17, "pagemap": 29, "pagepostssectionpagelet": 17, "pagetoken": [18, 28], "pageview": [14, 26], "pagin": [29, 32], "pai": 18, "paid": [9, 26], "pair": 23, "pakistan": 19, "pam": 19, "pamper": [9, 26], "pand": 29, "panda": [7, 8, 9, 10, 12, 13, 14, 17, 18, 19, 20, 23, 25, 26, 29, 32], "panel": 18, "paper": 8, "param": 4, "paramet": [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 23, 24, 26, 27, 28, 29, 30, 31, 32], "parent": [6, 28], "parenthes": 27, "parentid": 28, "parmet": [4, 20], "parquet": [0, 14, 25, 29, 30, 31], "parquet_column": [7, 29], "parquet_fileapth": 7, "parquet_filepath": 7, "pars": [0, 10, 11, 20, 29, 30, 31, 32], "parser": 32, "part": [7, 9, 13, 16, 18, 21, 25, 26, 27, 28, 32], "parti": [6, 23], "partial": 8, "particular": [7, 17, 18, 19, 28], "particularli": [18, 20], "partner": [18, 28], "pass": [6, 12, 17, 18, 19, 20, 23, 32], "password": 6, "password123": 6, "past": [18, 32], "patch": 14, "patch_minor": 14, "path": [0, 6, 7, 10, 11, 14, 17, 19, 20, 29, 30, 31], "path_1": 25, "path_2": 25, "path_3": 25, "pather": 7, "patienc": 17, "pattern": [0, 4, 6, 7, 9, 11, 14, 17, 19, 25, 26, 30], "paus": [0, 5, 30], "pc": 6, "pd": [6, 7, 9, 10, 14, 17, 19, 20, 25, 26, 29], "peck": 11, "penguinnyyyyi": [9, 26], "peopl": [8, 9, 12, 23, 26, 32], "per": [4, 6, 7, 8, 9, 17, 18, 19, 23, 26, 32], "perc": [14, 16], "percentag": [16, 26, 29], "perform": [7, 14, 16, 18, 19, 20, 28], "perhap": 26, "period": 10, "periscop": 23, "permiss": 17, "permut": [4, 13], "perry_ron": 9, "persian": [4, 19, 21, 29], "person": [9, 12, 18, 26], "perspect": [20, 25, 26], "pet": 28, "petalbot": 14, "phone": [6, 9], "photo": [18, 23], "php": 17, "phrase": [13, 18, 23, 26, 27, 29], "phrase_len": [4, 26, 27, 29], "physic": 28, "pic": 23, "pick": 32, "pictur": [19, 26], "piec": 10, "pink": 18, "pinterest": 17, "pinterestbot": 17, "pip": [4, 6, 30, 32], "pip3": 30, "pipe": [18, 26, 28], "pipelin": [16, 32], "pixel": [6, 11], "place": [1, 6, 8, 9, 11, 12, 23, 29], "placehold": [29, 32], "plai": [8, 12, 18, 19, 28], "plain": 28, "plaintext": 28, "plan": [7, 14], "plant": 8, "platform": [2, 26, 32], "playback": [18, 28], "player": [6, 28], "playlist": [18, 28, 29], "playlist_items_list": 28, "playlistid": 28, "playlistitem": 28, "playlists_list": 28, "playstat": 6, "pleas": [4, 7, 10, 12, 18, 20, 23, 26, 28], "plotli": 32, "plu": 6, "png": [10, 11, 19, 20], "podcast": [7, 19], "point": [7, 8, 12, 18, 25, 28], "pointer": 16, "polici": [7, 10], "polish": [4, 18, 21, 29], "polit": [6, 23, 25, 28], "pop": 28, "popul": 25, "popular": [6, 14, 23, 28, 32], "port": [6, 25], "porto": 1, "portug": 1, "portugues": [4, 18, 19, 21, 29], "posicionamiento": 12, "posit": [4, 7, 9, 12, 18, 23, 28], "possibl": [13, 14, 17, 18, 26, 32], "possibli": 26, "post": [8, 9, 21, 23, 25, 26, 30, 32], "posts2": 9, "potato": 8, "potent": 17, "potenti": [20, 23], "potteri": 19, "pound": 9, "povertydata": 10, "power": [7, 9, 10, 20, 32], "ppc": 26, "ppp046177196171": 14, "ppp089047044105": 14, "ppr1": 6, "practic": [6, 14, 16, 20, 32], "practition": 32, "pre": [0, 14, 26, 29, 30, 31], "preced": 29, "prefer": 23, "preferenti": 23, "prefix": [12, 29], "preload": 10, "premier": 19, "prepar": [0, 8, 19, 29, 30, 31], "prepend": [23, 25], "present": [7, 18, 23, 29], "preserv": 29, "presidenti": 19, "pressur": 20, "pretti": 20, "prevent": [23, 29], "previou": [25, 28, 29], "previous_cursor": 23, "prevpagetoken": [18, 28], "prg": 10, "price": [13, 20, 25], "print": [8, 9, 14, 19, 21, 29], "prioriti": 19, "privaci": 7, "pro": 6, "probabl": [6, 7, 19, 20, 32], "process": [4, 6, 7, 10, 14, 16, 19, 25], "processingdetail": 28, "prod": 11, "produc": [1, 14, 18, 19, 32], "product": [0, 1, 2, 12, 13, 18, 20, 25, 26, 29], "product1": 25, "product2": 20, "profession": 28, "profil": [17, 23], "program": [4, 23, 32], "programmat": 18, "prohibit": 17, "project": [7, 10, 12, 18, 20, 28, 32], "promis": 32, "promo": 19, "promot": 19, "proper": [9, 32], "properli": [13, 14, 18, 19, 25, 28, 29], "properti": [6, 7, 18, 20, 28], "protect": 7, "protocol": 10, "prouc": 18, "provid": [1, 2, 4, 6, 7, 9, 11, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 28, 29, 32], "proxi": [0, 5, 16, 30], "proxito": [10, 20], "proxy_retry_tim": 6, "ps7233": 6, "ptt5": 6, "public": [8, 10, 20, 23], "publication_languag": 19, "publication_nam": 19, "publish": [6, 19, 28], "publishedaft": [18, 28], "publishedbefor": [18, 28], "pump": [9, 26], "punctuat": 29, "puppi": 23, "purchas": 13, "purpl": 18, "purpos": 17, "put": [7, 13, 14, 19, 20, 26], "puzzl": 28, "py8fhdj5dlvvxnq6zjotzvbg1s8skwaaaacxrstlmav4eo10jnqa8ihfydaaabjuleqvryw93y64rcmbcg4czk5fszdav3f63bdaxfv4qm": 11, "pydata": 7, "pypi": 29, "pyt": 20, "pyth": 20, "python": [0, 4, 8, 14, 21, 23, 29, 30, 31, 32], "python3": [4, 32], "python_tweet": 23, "q": [10, 18, 23, 28], "q_80": 19, "qatar": 7, "qd1a": 6, "qp1a": 6, "qualifi": 8, "qualiti": [7, 10, 11, 18], "quantifi": 26, "queri": [0, 11, 12, 14, 17, 18, 19, 23, 28, 29, 30, 31, 32], "query_": 25, "query_color": 25, "query_pric": 25, "query_s": 25, "query_tim": [12, 29], "querytim": [18, 29], "question": [0, 2, 19, 23, 26, 27, 29, 30, 31, 32], "question_mark": 9, "question_mark_count": 9, "question_mark_freq": 9, "question_mark_nam": 9, "question_marks_flat": 9, "question_marks_per_post": 9, "question_summari": 9, "question_text": 9, "questionnair": 18, "quick": [2, 7, 17, 19], "quickli": 6, "quit": [17, 19, 26], "quot": [27, 29], "quota": 28, "quotat": 26, "r": 4, "r0lgodlhaqabaiaaaaaaap": 11, "r16nw": 6, "r3ynjjjcug": [9, 26], "ra": 19, "race": [19, 28], "radio": 19, "radiu": 23, "rai": [10, 20], "rain": [9, 26], "rais": [4, 20, 29], "ran": 20, "randolph": 19, "random": [6, 17], "rang": [18, 23], "rank": [12, 20, 29, 32], "rapid": 14, "rate": [18, 19, 20, 23, 26, 28], "rate_limit_statu": 23, "rather": 26, "raw": 15, "rc2": 10, "re": [9, 19, 23, 26], "reachabl": 20, "read": [6, 7, 9, 11, 14, 25, 32], "read_csv": [9, 26], "read_json": [6, 7, 10, 17, 20], "read_parquet": [7, 14, 25], "readabl": [15, 17, 20, 32], "reader": 14, "readi": [7, 13, 23], "readm": 20, "readthedoc": [10, 16, 20, 23], "real": [9, 23, 26], "realiti": 17, "realli": [7, 10, 13, 14, 20, 26], "reason": [6, 9, 10, 14, 17, 20, 26, 29, 32], "receiv": [17, 23], "recent": [1, 23, 28], "recip": [0, 5, 20, 29, 30], "recommend": [9, 14, 20, 23], "recordingdetail": 28, "recurs": [4, 19, 20, 29], "red": [18, 25], "redirect": [0, 4, 14, 20, 29, 30, 31, 32], "redirect_": [7, 20], "redirect_df": 7, "redirect_from": 14, "redirect_reason": [10, 20], "redirect_tim": [7, 10, 20], "redirect_to": 14, "redirect_ttl": [10, 20], "redirect_url": [10, 20], "redistrict": [9, 26], "reduc": 7, "refer": [4, 7, 10, 12, 14, 18, 20, 23, 25], "referer_": 14, "referer_dir_1": 14, "referer_dir_2": 14, "referer_dir_3": 14, "referer_frag": 14, "referer_hostnam": 14, "referer_last_dir": 14, "referer_netloc": 14, "referer_path": 14, "referer_port": 14, "referer_queri": 14, "referer_schem": 14, "referer_url": 14, "referer_url_df": 14, "referr": [10, 24], "referrerpolici": [20, 29], "reflect": 29, "regard": [20, 26], "regardless": [18, 28, 29], "regex": [0, 6, 7, 8, 9, 11, 14, 15, 26, 29, 30, 32], "regex_raw": 15, "regga": 28, "region": [20, 23, 28], "regioncod": [18, 28], "regul": 7, "regular": [0, 4, 7, 8, 9, 11, 14, 20, 26, 29, 30, 31], "reilli": 15, "reinvent": 19, "rel": [6, 7, 12, 16, 17, 20, 25, 29, 32], "rel_valu": [26, 29], "relat": [9, 18, 19, 20, 28, 32], "relatedsit": 29, "relatedtovideoid": [18, 28], "relayout": 29, "releas": 29, "relev": [13, 18, 23, 24, 28], "relevancelanguag": [18, 28], "reli": 20, "reliabl": 12, "religion": 28, "remain": [2, 19, 27], "remaind": [2, 29], "remark": 20, "rememb": 20, "remov": [18, 20, 23, 26, 27, 28, 29], "renam": 25, "render": 32, "repeat": [7, 9, 25, 29], "repetit": 9, "replac": [1, 14, 19, 25, 29, 32], "repli": [23, 28], "repons": 29, "report": [10, 16, 20, 25, 26, 29, 32], "repres": [7, 9, 12, 14, 18, 19, 23, 26], "represent": 23, "request": [0, 5, 7, 10, 11, 12, 14, 16, 18, 20, 23, 28, 29, 30, 31, 32], "request_": 14, "request_dir_1": 14, "request_dir_10": 14, "request_dir_11": 14, "request_dir_12": 14, "request_dir_13": 14, "request_dir_2": 14, "request_dir_3": 14, "request_dir_4": 14, "request_dir_5": 14, "request_dir_6": 14, "request_dir_7": 14, "request_dir_8": 14, "request_dir_9": 14, "request_frag": 14, "request_head": 19, "request_headers_": [6, 7, 20], "request_headers_accept": [6, 10, 20], "request_headers_cooki": 20, "request_headers_proxi": 6, "request_headers_us": [6, 10, 20], "request_hostnam": 14, "request_last_dir": 14, "request_netloc": 14, "request_path": 14, "request_port": 14, "request_queri": 14, "request_query_": 14, "request_query__": 14, "request_query_a": 14, "request_query_aam": 14, "request_query_abspath": 14, "request_query_act": 14, "request_query_adapt": 14, "request_query_ag": 14, "request_query_albid": 14, "request_query_cmd": 14, "request_query_cod": 14, "request_query_cont": 14, "request_query_control": 14, "request_query_cpabc_calendar_upd": 14, "request_query_curpath": 14, "request_query_currentset": 14, "request_query_dir": 14, "request_query_dn": 14, "request_query_email": 14, "request_query_fil": 14, "request_query_file_link": 14, "request_query_filenam": 14, "request_query_filepath": 14, "request_query_findcli": 14, "request_query_fn": 14, "request_query_folderid": 14, "request_query_format": 14, "request_query_funct": 14, "request_query_gid": 14, "request_query_id": 14, "request_query_img": 14, "request_query_index": 14, "request_query_input_fil": 14, "request_query_item": 14, "request_query_itemid": 14, "request_query_lang": 14, "request_query_libpath": 14, "request_query_mod": 14, "request_query_mypath": 14, "request_query_nam": 14, "request_query_next_fil": 14, "request_query_nocontinu": 14, "request_query_op": 14, "request_query_opt": 14, "request_query_ord": 14, "request_query_p": 14, "request_query_pag": 14, "request_query_panel": 14, "request_query_path": 14, "request_query_posit": 14, "request_query_psd": 14, "request_query_q": 14, "request_query_redirect": 14, "request_query_ref": 14, "request_query_rid": 14, "request_query_sb_categori": 14, "request_query_scopenam": 14, "request_query_search_kei": 14, "request_query_servic": 14, "request_query_short": 14, "request_query_sit": 14, "request_query_srt": 14, "request_query_step": 14, "request_query_stockcodeintern": 14, "request_query_target": 14, "request_query_term": 14, "request_query_thumb": 14, "request_query_titl": 14, "request_query_todo": 14, "request_query_typ": 14, "request_query_typeid": 14, "request_query_url": 14, "request_query_usernam": 14, "request_query_v": 14, "request_query_var": 14, "request_query_wt": 14, "request_query_xdebug_session_start": 14, "request_schem": 14, "request_url": 14, "request_url_df": 14, "requir": [4, 6, 14, 18, 19, 20, 23, 24, 28, 29], "rerun": 6, "resampl": 19, "research": [13, 17, 32], "resolut": [18, 28], "resourc": [14, 18, 20, 23, 28, 32], "resourceid": 28, "resp_headers_": [7, 20], "resp_headers_access": 20, "resp_headers_ag": [10, 20], "resp_headers_alt": 10, "resp_headers_cach": [10, 20], "resp_headers_cf": [10, 20], "resp_headers_cont": [10, 20], "resp_headers_d": [10, 20], "resp_headers_etag": 10, "resp_headers_expect": [10, 20], "resp_headers_expir": [10, 20], "resp_headers_last": [10, 20], "resp_headers_permiss": 10, "resp_headers_referr": 10, "resp_headers_serv": [10, 20], "resp_headers_strict": [10, 20], "resp_headers_vari": [10, 20], "resp_headers_via": 10, "resp_headers_x": [10, 20], "resp_meta_": 29, "respect": [2, 6, 7, 14, 20, 23, 26, 29], "respons": [0, 4, 7, 11, 12, 14, 17, 18, 19, 20, 23, 28, 29, 30, 31, 32], "rest": [9, 12, 17], "restaur": 9, "restrict": [2, 11, 12, 18, 20, 23, 28, 29], "result": [0, 1, 6, 7, 8, 11, 14, 17, 19, 20, 23, 25, 26, 28, 29, 30, 31, 32], "result_typ": 23, "resultscor": 12, "resum": [0, 5, 30, 32], "retain": [13, 25], "retreiv": [6, 19, 29], "retri": 6, "retriev": [18, 19, 23, 28], "return": [1, 2, 4, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 23, 24, 25, 26, 27, 28, 29, 32], "returnd": 29, "retweet": [23, 26], "retweeted_of_m": 23, "retweets_of_m": 23, "reus": [18, 28], "reveal": 19, "revenu": 26, "revers": [0, 14, 18, 23, 28, 29, 30, 31], "reverse_dns_lookup": [14, 16, 29], "review": [14, 18, 19, 28], "rewear": 11, "rewrit": 29, "rfc": [18, 28], "rhythm": 28, "rich": [19, 23], "richer": 20, "right": [13, 14, 18, 19, 25], "right_char": 9, "rio": 13, "risk": 19, "rm": 6, "rm_word": 26, "rn": 19, "rnkt7myjj7hcnsvbnzg9qdqizefftx9ytz3": 10, "robot": [0, 5, 11, 14, 19, 20, 29, 30, 31, 32], "robots_output_fil": 17, "robots_url": 17, "robotsfiles_df": 17, "robotslist": 4, "robotstxt": 29, "robotstxt_df": 17, "robotstxt_last_modifi": [17, 29], "robotstxt_obei": [6, 10, 11], "robotstxt_test": [17, 29], "robotstxt_test_df": 17, "robotstxt_to_df": [17, 29], "robotstxt_url": 17, "robotx": 17, "rock": 28, "rogen": 19, "roku": 6, "roku4640x": 6, "role": [25, 28], "rolling_new": 19, "romanian": [4, 18, 21, 29], "root": 23, "rotat": 6, "rotating_proxi": 6, "rotating_proxy_list_path": 6, "rotatingproxymiddlewar": 6, "round": 7, "row": [7, 13, 14, 17, 18, 19, 26], "rq3a": 6, "rquest": 20, "rsvp": [9, 26], "rtd": [10, 20], "rtl": 9, "rule": [0, 5, 10, 11, 14, 17, 20, 25, 30, 32], "run": [0, 4, 6, 8, 11, 12, 13, 16, 17, 18, 19, 20, 23, 25, 26, 29, 30, 31, 32], "runnig": 19, "running_crawl": 29, "russia": 19, "russian": [4, 18, 19, 21, 29], "rv": 6, "s10": 6, "s20": 6, "s21": 6, "s22": [6, 19], "s3": 6, "s6": 6, "s7": 6, "s8": 6, "s9": 6, "s906n": 6, "safari": [6, 14], "safe": [18, 23], "safesearch": [18, 28], "safeti": [6, 18], "safetycheck": 17, "sahil": 7, "sai": [9, 13, 18, 20, 26, 32], "said": [9, 17], "sail": [18, 28], "sale": 26, "same": [0, 1, 2, 4, 5, 7, 8, 9, 11, 12, 13, 14, 17, 18, 19, 20, 23, 25, 26, 28, 29, 30, 32], "sampl": [8, 12, 19, 20], "sample_log": 14, "samsung": [6, 19], "samsungbrows": 6, "sara": 19, "satisfi": 14, "save": [0, 5, 7, 10, 11, 14, 17, 20, 25, 29, 30, 32], "saver": [9, 26], "scale": [0, 16, 29, 30, 31], "scari": 23, "scenario": 2, "schauspielerin": 12, "schema": 12, "scheme": [19, 25], "scienc": [19, 32], "scientist": 32, "score": [12, 26], "scrape": [0, 5, 14, 20, 29, 30, 31], "scraper": 14, "scrapi": [4, 6, 10, 20, 32], "screen": [6, 23], "screen_nam": 23, "script": [6, 10], "script_src": 6, "sd": [18, 28], "sd1a": 6, "se": 6, "seahawk": 19, "search": [0, 10, 12, 17, 19, 23, 24, 25, 28, 29, 30, 31, 32], "search_us": 23, "searchterm": 28, "searchtyp": 18, "seattl": 19, "sec": 20, "second": [2, 6, 7, 9, 12, 13, 18, 19, 20, 25, 29, 32], "secret": 11, "section": [7, 17, 18, 28], "secur": [10, 20], "see": [2, 6, 7, 8, 9, 11, 12, 16, 17, 18, 19, 20, 23, 25, 26, 27, 28, 32], "seem": [19, 20, 26], "seen": [7, 14], "segment": [13, 23], "selecotr": 20, "select": [7, 11, 14, 18, 20, 23, 28], "selector": [0, 4, 29, 30, 31, 32], "selector_1": 20, "selector_2": 20, "self": 25, "sell": [4, 13, 19], "sem": [0, 20, 29, 31], "sem_campaign": 13, "semi": 23, "semkw": 4, "semrush": 32, "senat": 19, "senatewinn": 19, "send": [12, 14, 23, 32], "sendfil": [10, 20], "sens": [25, 26], "sensit": [6, 13, 23], "sent": [18, 23, 28], "sentenc": [4, 9, 26], "seo": [0, 5, 12, 13, 17, 19, 26, 29, 31], "seop": 12, "seoul": 12, "sep": [2, 29], "separ": [2, 4, 7, 9, 14, 18, 20, 23, 25, 28, 29], "sequenc": [26, 32], "serbian": 18, "seri": [6, 19, 25], "seriou": 26, "serp": [0, 12, 25, 29, 30, 31, 32], "serp_": [18, 32], "serp_df": 18, "serp_goog": [18, 20, 29, 32], "serp_youtub": [18, 29], "serv": [10, 20], "server": [0, 5, 10, 14, 16, 18, 20, 28, 30], "servic": [8, 12, 13, 18, 19], "session": [14, 18, 23], "set": [0, 2, 4, 5, 7, 10, 11, 12, 13, 14, 18, 19, 23, 25, 26, 27, 28, 29, 30, 31, 32], "set_auth_param": 23, "set_index": 19, "set_logging_level": 18, "seth": 19, "setup": [0, 29, 30, 31], "sever": [0, 6, 7, 10, 14, 18, 23, 26, 28, 29, 30, 31, 32], "seznambot": 17, "sgp771": 6, "shape": [9, 14, 19], "share": [15, 20, 26], "shatel": 16, "she": [9, 26], "sheet": [29, 32], "shield": 6, "shift": 13, "shoe": [20, 26], "shop": [2, 20], "short": [2, 18, 26, 28], "shorter": [2, 28, 29], "should": [1, 4, 6, 9, 10, 11, 12, 17, 18, 20, 23, 26, 28, 32], "shouldn": [13, 14], "shout": [9, 26], "show": [4, 7, 8, 9, 12, 13, 14, 18, 20, 25, 26, 28, 29], "show_list": 23, "show_owned_list": 23, "shown": [12, 25], "si": 19, "side": [9, 14, 26, 27], "sidebar": [20, 29], "sidebar_link": 20, "sidebar_links_url": 20, "sign": [4, 9, 17, 20, 25, 27], "signatur": 23, "signifi": 13, "silk": 6, "similar": [1, 7, 9, 20, 25, 28, 29], "similarli": [18, 20, 28], "simpl": [6, 7, 8, 9, 10, 16, 19, 20, 23, 26, 32], "simpler": 29, "simplest": [20, 25], "simpli": [6, 7, 13, 14, 16, 17, 19, 20, 26], "simplifi": [10, 18, 28], "simul": 28, "simultan": 20, "sinc": [1, 2, 7, 9, 10, 19, 20, 23, 26], "since_id": 23, "sine": 10, "singapor": 1, "singl": [14, 17, 18, 20, 23], "singular": 9, "sinhala": [4, 21, 29], "site": [6, 7, 13, 17, 18, 19, 20, 25, 26, 28, 29, 32], "site_crawl": 20, "site_scraping_tos_term": 17, "sitemap": [0, 17, 20, 25, 29, 30, 31, 32], "sitemap_df": 19, "sitemap_download": 29, "sitemap_last_modifi": [19, 29], "sitemap_size_mb": [19, 29], "sitemap_to_df": [19, 20, 29], "sitemap_url": [4, 19], "sitemapindex": 29, "sitename_crawl_yyyy_mm_dd": 20, "sitesearch": 18, "sitesearchfilt": 18, "sitmeapindex": 19, "situat": 25, "sivasubramanian": 19, "six": [2, 26], "sixti": 26, "size": [7, 10, 14, 17, 18, 19, 20, 23, 25, 29], "size_i": 7, "size_x": 7, "skateboard": 19, "skin": 8, "skip": [7, 20, 29], "skip_statu": 23, "skip_url_param": 29, "slash": 11, "slectorgadget": 20, "slice": 23, "slight": 29, "slot": [1, 2, 29, 32], "slovak": 18, "slovenia": 8, "slovenian": 18, "slow": [0, 5, 30], "slug": [11, 19, 23, 25], "slurp": [6, 17], "sm": [6, 25], "small": [7, 8, 17, 18, 28], "smaller": [7, 19, 28, 29], "smartphon": [6, 14, 19], "smile": 8, "smilei": [8, 9], "snippet": [18, 20, 28, 29], "snow": [9, 26], "so": [0, 2, 5, 7, 9, 10, 11, 12, 13, 14, 17, 18, 19, 20, 23, 25, 26, 28, 30, 32], "social": [8, 9, 20, 21, 25, 26], "societi": 28, "softwareappl": 12, "solut": [6, 7], "some": [6, 7, 8, 9, 10, 11, 14, 16, 17, 18, 19, 20, 23, 25, 26, 28, 29, 32], "somehow": 26, "someon": 26, "someth": [1, 2, 26], "sometim": [2, 6, 13, 17, 20, 26, 32], "somewher": [26, 32], "soni": 6, "sonl": 7, "soon": 17, "sophist": 32, "sort": [18, 21, 25, 26, 28, 29], "sort_valu": [7, 26], "soul": 28, "sound": [9, 26], "sourc": [1, 2, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 23, 24, 25, 26, 27, 28], "source_fil": 14, "south": 12, "space": [2, 4, 18, 19, 23, 27, 29], "spaci": [21, 29], "spam": 28, "span": [20, 29], "spanish": [4, 9, 18, 21, 29], "speak": 18, "spec": 2, "special": [4, 9, 11, 19, 20, 25, 29, 32], "specif": [4, 7, 10, 18, 23, 28, 32], "specifi": [0, 2, 5, 7, 9, 12, 14, 17, 18, 20, 23, 27, 28, 29, 30, 32], "speed": [10, 20, 29], "spend": [9, 23, 26, 32], "spent": 32, "sphinx": 29, "spider": [0, 4, 6, 10, 11, 14, 29, 30, 31, 32], "split": [0, 2, 14, 19, 20, 26, 27, 29, 30, 31, 32], "sport": [8, 11, 19, 28], "spotlight": 7, "spread": [7, 32], "squar": 8, "square320": 7, "square_bann": 24, "src": [6, 7, 11, 20, 29], "srcset": [20, 29], "ss": 28, "stage": 14, "standard": [14, 18, 20, 23, 25, 28, 32], "star": [1, 19], "start": [2, 4, 6, 9, 13, 14, 18, 19, 20, 23, 26, 28, 29, 32], "start_request": [10, 11], "start_url": 11, "starting_out": 23, "stat": [6, 8, 9, 29], "state": [12, 18, 19, 25, 28], "statement": 11, "static": [7, 11, 14, 17], "static01": [7, 19], "statist": [8, 9, 16, 28, 29, 32], "statu": [0, 4, 7, 8, 14, 20, 23, 28, 29, 30, 31, 32], "status": 23, "stdout": 14, "stearn": 19, "step": [6, 12, 27, 32], "still": [2, 9, 11, 18, 19, 23, 25, 26, 28], "stiller": 19, "stitch": 32, "stop": [0, 4, 5, 20, 26, 29, 30, 32], "stopword": [0, 26, 29, 30, 31, 32], "storag": [14, 19], "store": [11, 14, 20], "store_uri": 11, "storebot": 6, "str": [1, 2, 7, 8, 9, 10, 11, 12, 13, 14, 17, 18, 19, 20, 23, 24, 25, 26], "straight": 10, "straightforward": [9, 11, 20, 25], "strateg": [19, 32], "strategi": [0, 5, 7, 10, 11, 13, 17, 19, 20, 28, 30], "stream": [23, 32], "strftime": 14, "strict": [18, 28], "stricter": 29, "string": [0, 1, 2, 5, 7, 8, 9, 12, 14, 15, 18, 23, 24, 27, 28, 29, 30], "stringify_id": 23, "strip": [23, 26, 27], "strongli": [20, 23], "struct": 7, "structur": [0, 6, 7, 10, 13, 19, 23, 29, 30, 31, 32], "stuff": [2, 32], "style": [17, 20, 29], "stylesheet": 6, "sub": [0, 4, 5, 7, 8, 9, 11, 14, 17, 19, 20, 23, 25, 29, 30, 32], "sub_group": 8, "subdomain": [10, 20], "submodul": [20, 29, 30, 31], "subpackag": [29, 30, 31], "subscrib": [23, 28], "subscribersnippet": 28, "subscript": [23, 28], "subscription_order_relev": 28, "subscriptions_list": 28, "subsequ": [18, 28], "subset": [7, 19], "substr": 12, "suchmaschinenmarket": 12, "suchmaschinenoptimierung": 12, "sugar": [9, 26], "suggest": [6, 25, 28], "suit": 14, "suitabl": 12, "sulli": 12, "summar": [7, 8, 9, 14], "summari": [7, 8, 9, 11, 29, 32], "summarize_crawled_img": 11, "summer_promo": 24, "super": [10, 14], "superhero": 23, "suppli": [7, 11, 16, 18, 20, 29], "support": [0, 10, 17, 18, 20, 23, 28, 29, 30, 31, 32], "suppos": [17, 25], "sure": [0, 1, 2, 5, 9, 10, 16, 18, 20, 23, 25, 29, 30], "surround": [9, 29], "surrounding_text": 9, "survei": [18, 30], "susan": 19, "suspend": 23, "svc": 10, "swami": 19, "swedish": [4, 18, 21, 29], "sweet": [9, 26], "switch": 6, "sy": 14, "sym": 9, "symbol": [8, 9, 29], "syndic": [18, 28], "system": [14, 25, 29], "sz": 28, "s\u00fcdkorea": 12, "t": [0, 1, 4, 5, 7, 9, 10, 13, 14, 18, 19, 20, 25, 26, 27, 29, 30, 32], "t550": 6, "t827r4": 6, "tab": 6, "tabl": [11, 12, 13, 20, 32], "tablet": [6, 19], "tabular": 32, "tackl": 32, "tag": [6, 7, 10, 17, 18, 19, 20, 28, 29, 32], "tagalog": [4, 21, 29], "tail": 13, "take": [2, 6, 7, 9, 12, 13, 14, 17, 19, 20, 23, 25, 26, 29, 32], "taken": 11, "talk": 2, "tamil": [4, 21, 29], "target": [13, 28], "task": [10, 13, 26, 27, 32], "tatar": [4, 21, 29], "tc2": 19, "tea": [9, 26], "teach": 19, "teal": 18, "team": 19, "technic": [7, 12, 32], "techniqu": [6, 26, 32], "technologi": [12, 28], "tediou": [13, 25], "telegrambot": 17, "tell": 20, "telugu": [4, 21, 29], "templat": [1, 2, 6, 19], "temporari": 14, "ten": [18, 26], "tenni": 28, "teoma": 17, "term": [18, 20, 24, 28, 32], "test": [0, 6, 8, 29, 30, 31, 32], "tester": [0, 29, 30, 31], "text": [0, 1, 6, 7, 10, 14, 20, 21, 23, 27, 28, 29, 31], "text_ad": 2, "text_list": [4, 8, 9, 26, 27], "text_list2": 26, "textformat": 28, "textual": [8, 9], "tha": 11, "thai": [4, 21, 29], "than": [1, 2, 17, 18, 19, 20, 23, 26, 28, 29, 32], "thebe": 29, "thei": [6, 7, 9, 14, 16, 17, 18, 19, 20, 23, 25, 26, 27, 28, 29, 32], "theinterpret": 7, "them": [0, 2, 4, 5, 7, 9, 10, 11, 13, 14, 16, 17, 19, 20, 23, 25, 26, 29, 30, 32], "themselv": [26, 29], "thenc": 26, "theoffic": [9, 26], "thereaft": 26, "therebi": [26, 28], "therefor": [6, 18, 26, 28], "therein": 26, "thereupon": 26, "thi": [1, 2, 4, 6, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 27, 28, 29, 30, 32], "thing": [1, 2, 9, 12, 13, 19, 20, 25, 27, 30, 32], "think": [9, 20, 25], "third": [20, 23, 26], "those": [0, 2, 4, 5, 7, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 25, 26, 28, 29, 30, 32], "though": [17, 18, 20, 23, 26, 28], "thought": [7, 12], "thousand": [6, 8, 10, 16, 26, 32], "thread": [19, 28, 29], "three": [1, 6, 7, 9, 19, 20, 23, 25, 26, 29, 32], "through": [6, 10, 11, 14, 17, 18, 19, 20, 23, 25, 26, 27, 28, 32], "throughout": 26, "thru": 26, "thu": [7, 10, 20, 26], "thumbwid": 7, "ticket": 18, "tidi": 7, "tiktok": [9, 26], "time": [1, 2, 6, 7, 8, 9, 10, 12, 13, 14, 16, 17, 18, 19, 20, 23, 26, 28, 29, 32], "timecr": 28, "timelin": 23, "timeout": 29, "timestamp": [7, 14], "tini": 17, "tip": 10, "titl": [6, 7, 9, 13, 18, 20, 25, 26, 28, 29, 32], "tl": 14, "tld": [18, 29], "to_datetim": 14, "to_fram": 7, "to_parquet": 14, "toctre": 20, "todai": [1, 8, 9], "togeth": [7, 8, 9, 14, 17, 23, 25, 26, 32], "token": [0, 26, 29, 30, 31], "token_typ": 23, "tokyo": 1, "tolist": [17, 19], "tomiobaro": 11, "tommi": 19, "tone": 8, "too": [0, 5, 9, 20, 26, 30, 32], "took": 20, "tool": [0, 10, 14, 18, 19, 20, 25], "top": [0, 7, 8, 9, 12, 13, 14, 16, 18, 19, 20, 23, 25, 26, 28, 29, 30, 31, 32], "top_bot": 14, "top_currency_symbol": 9, "top_domain": 9, "top_emoji": [8, 9], "top_emoji_categori": 29, "top_emoji_group": [8, 9], "top_emoji_sub_categori": 29, "top_emoji_sub_group": [8, 9], "top_emoji_text": [8, 9], "top_exclamation_mark": 9, "top_hashtag": 9, "top_ment": 9, "top_numb": 9, "top_question_mark": 9, "top_tld": 9, "top_url": 9, "top_word": 9, "topic": [4, 18, 19, 23, 25, 28, 29], "topic1": 25, "topic2": 25, "topic_1": 25, "topic_2": 25, "topicdetail": 28, "topicid": [18, 28], "tor": 14, "total": [2, 14, 17, 26], "tourism": 28, "toward": [23, 26], "town": 29, "toyota": [1, 13], "traceback": 1, "track": [11, 28, 29, 32], "tracker": 19, "tradit": [18, 28], "traffic": [20, 23, 24], "trail": [27, 29], "train": 9, "transport": [10, 19, 20], "travel": [8, 9], "trend": [23, 26, 32], "tricki": 20, "trigger": 6, "trim": 27, "trim_us": 23, "trip": 13, "true": [1, 2, 4, 6, 7, 9, 10, 11, 13, 14, 17, 18, 19, 20, 23, 25, 26, 28, 29], "truestatus": 23, "truncat": 20, "try": [14, 17, 18, 20, 26, 28, 32], "tuesdai": [9, 26], "tuh": 11, "turkc": 19, "turkish": [4, 18, 21, 29, 32], "turn": [7, 18], "tutor": 13, "tutori": [13, 20, 32], "tv": [6, 19, 28], "tw": 18, "tweet": [9, 21, 23, 26, 32], "tweet_": 23, "tweet_mod": 23, "tweet_text": [9, 26], "twelv": 26, "twenti": [13, 17, 26], "twice": [0, 5, 26, 29, 30], "twimg": 23, "twitter": [0, 6, 9, 10, 17, 20, 24, 29, 30, 31, 32], "twitterbot": [14, 17], "two": [1, 2, 6, 7, 9, 11, 13, 15, 17, 18, 20, 25, 26, 28, 29, 32], "twtr_content": 6, "twtr_name": 6, "twython": [23, 29], "txt": [0, 5, 8, 11, 14, 19, 20, 29, 30, 31, 32], "type": [0, 1, 2, 4, 6, 9, 10, 12, 13, 14, 17, 18, 20, 23, 25, 26, 28, 29, 30, 31, 32], "typic": [1, 2, 7, 9, 10, 14, 16, 18, 19, 20, 23, 25, 26, 28, 32], "u": [6, 7, 13, 14, 18, 19, 20, 26, 28], "u20": 6, "u5vdyevvf": [9, 26], "ua": 14, "ua_": 14, "ua_devic": 14, "ua_df": 14, "ua_famili": 14, "ua_major": 14, "ua_minor": 14, "ua_o": 14, "ua_pars": 14, "ua_patch": 14, "ua_str": 14, "ubuntu": [6, 10], "ug": 21, "uk": 18, "ukchina": 19, "ukrainian": [4, 21, 29], "ultra": [6, 19], "umlrxrgovgmqtj4hxc69an5hj9pcyyqzfxsavk58tjmntwgv24pw9kpe0fgbioklomczkngleuxlhyiimx": 11, "unalign": 25, "unchang": 7, "und": 14, "under": [6, 10, 11, 12, 14, 17, 26, 29], "undersand": 7, "underscor": 19, "understand": [7, 12, 13, 17, 18, 19, 23, 25, 32], "understood": 12, "unexpect": 20, "unicod": [8, 15], "unifi": 29, "uniqu": [28, 29], "unique_currency_symbol": 9, "unique_emoji": [8, 9], "unique_exclamation_mark": 9, "unique_hashtag": 9, "unique_ment": 9, "unique_numb": 9, "unique_question_mark": 9, "unique_url": 9, "unique_word": 9, "unit": [18, 19, 23, 28], "univers": 32, "unix": 32, "unknown": [14, 16], "unless": [17, 26], "unlik": 25, "unlock": 20, "unnest": 23, "unpack": [7, 19], "unread": 28, "unrel": 32, "unsign": 28, "unspecifi": 18, "until": [9, 23, 26], "unusu": 25, "up": [2, 7, 10, 12, 13, 18, 20, 23, 25, 26, 29, 32], "upcom": [18, 28, 32], "updat": [6, 19, 29], "upload": [13, 18, 19, 23, 28], "upon": 26, "upscal": 7, "upshot": 7, "upshot_144x144": 7, "urdu": [4, 19, 21, 29], "uri": [10, 20], "url": [0, 2, 6, 7, 9, 10, 11, 12, 14, 15, 17, 18, 19, 23, 26, 28, 29, 30, 31, 32], "url_": 32, "url_1": [4, 11, 25], "url_2": [4, 11, 25], "url_3": [4, 11], "url_build": 20, "url_count": 9, "url_df": 19, "url_freq": 9, "url_list": [4, 6, 10, 14, 20], "url_path": 17, "url_redirected_to": 29, "url_summari": 9, "url_to_df": [14, 19, 25, 29, 32], "url_utm_ga": [24, 29], "urldf": 25, "urls_flat": 9, "urls_per_post": 9, "urls_to_test": 17, "urlth": 18, "urlyt": 14, "us": [0, 1, 5, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 28, 29, 30, 31, 32], "usa": 18, "usag": [4, 7, 8, 9, 20, 23, 32], "usd": 9, "usemap": [20, 29], "user": [0, 4, 5, 9, 11, 13, 14, 16, 18, 19, 20, 23, 28, 29, 30, 31, 32], "user123": 6, "user_": 23, "user_ag": [6, 10, 11, 14, 17, 20], "user_agent_pars": 14, "user_id": 23, "user_imag": 11, "user_ment": 23, "user_timelin": 23, "usernam": [6, 28], "usual": [6, 12, 16, 23, 25, 26], "usuali": 14, "utc": 19, "utf": [10, 14, 23], "util": [2, 19, 23], "utm": [24, 29], "utm_campaign": 24, "utm_cont": 24, "utm_medium": 24, "utm_sourc": 24, "utm_term": 24, "v": [0, 19, 30], "v1": 12, "v11": 15, "v13": 29, "v15": [8, 29], "v1642801328": 19, "v1644335726": 19, "v1644381627": 19, "v1644418652": 19, "v1644595412": 19, "v2": 11, "v271": 19, "v274": 19, "v281": 19, "v282": 19, "v285": 19, "v286": 19, "v290": 19, "v2_0_0m1638886228": 14, "v410": 6, "v41020c": 6, "v5": 7, "vacanc": 13, "valid": [9, 18, 23, 24, 28], "valu": [2, 4, 6, 7, 10, 12, 14, 18, 19, 20, 23, 25, 26, 28, 29], "value_count": [7, 14, 19], "valueerror": [1, 29], "variabl": 29, "varieti": 23, "variou": [7, 8, 9, 10, 19, 26, 29, 32], "ve": 29, "veget": 8, "vegetable_emoji": 8, "vegur": 10, "vehicl": 28, "venti": [9, 26], "verb": 13, "veri": [0, 1, 6, 10, 12, 13, 14, 16, 17, 19, 20, 23, 25, 26, 29, 30, 31, 32], "verif": 6, "verifi": [6, 16, 19], "versatil": 29, "version": [6, 10, 14, 20, 29], "vi": 7, "via": [18, 23, 26, 28], "vibe": [9, 26], "vid_id": 18, "video": [0, 6, 17, 18, 23, 26, 28, 29, 30, 31, 32], "video_categories_list": 28, "video_content_loc": 19, "video_descript": 19, "video_df": 18, "video_dur": 19, "video_expiration_d": 19, "video_publication_d": 19, "video_thumbnail_loc": 19, "video_titl": 19, "videocapt": [18, 28], "videocategori": 28, "videocategoryid": [18, 28], "videocount": [18, 28], "videodefinit": [18, 28], "videodimens": [18, 28], "videodur": [18, 28], "videoembedd": [18, 28], "videoid": 28, "videolicens": [18, 28], "videos_list": 28, "videosynd": [18, 28], "videotyp": [18, 28], "vietnames": [4, 19, 21, 29], "view": [12, 18, 23, 26, 28], "viewcount": [18, 28], "viewer": [18, 28], "viewport": [6, 7, 20, 29], "vine": 23, "violat": 28, "virginia": 19, "visit": [9, 26], "visual": 32, "vita": 6, "vmi660635": 14, "volleybal": 28, "von": 12, "vp": 19, "vymfddnm5hx334": 11, "w": [6, 10, 18, 19], "w3c": 20, "w_320": 19, "wa": [6, 7, 8, 9, 11, 14, 17, 19, 20, 23, 26, 29], "wai": [2, 6, 7, 9, 10, 14, 19, 20, 23, 25, 29, 32], "wait": [6, 20], "walk": 19, "wall": 14, "want": [0, 1, 2, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 27, 28, 30, 32], "warn": 18, "watch": [1, 19, 23], "water": 9, "we": [2, 7, 9, 12, 13, 14, 17, 19, 20, 25, 26, 32], "wear": 11, "web": [4, 10, 11, 12, 18, 19, 20, 21], "web000079": 20, "web00007a": 20, "web00007c": 20, "web00007g": 20, "web00007h": 20, "web00007k": 20, "webmast": 6, "webp": 7, "webpag": [7, 18, 28], "websit": [0, 4, 5, 12, 14, 17, 18, 19, 20, 25, 28, 29, 30, 31, 32], "website_name_crawl_1": 6, "website_name_crawl_2": 6, "wed": 20, "week": [18, 19, 23, 32], "weight": [0, 29, 30, 31, 32], "well": [7, 9, 10, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 27, 28, 29, 32], "went": 14, "were": [6, 7, 14, 18, 19, 23, 26, 28, 29], "weren": 14, "west": 19, "what": [2, 6, 7, 8, 9, 10, 12, 13, 14, 16, 17, 19, 20, 23, 25, 26, 32], "whatev": [26, 32], "when": [1, 4, 6, 9, 10, 12, 14, 17, 18, 19, 20, 23, 26, 28, 29, 32], "whenc": 26, "whenev": 26, "where": [2, 4, 6, 7, 9, 10, 11, 14, 18, 19, 20, 23, 25, 26, 29, 32], "wherea": 26, "whereaft": 26, "wherebi": 26, "wherein": 26, "whereupon": 26, "wherev": [26, 29], "whether": [1, 2, 4, 6, 7, 9, 12, 13, 14, 17, 18, 19, 20, 23, 25, 26, 28, 29], "which": [2, 4, 6, 7, 9, 10, 11, 12, 14, 17, 18, 19, 20, 21, 23, 25, 26, 27, 28, 29, 32], "whichev": [20, 25, 29, 32], "while": [0, 4, 5, 7, 14, 18, 19, 23, 25, 26, 28, 29, 30, 31], "white": [9, 18, 26, 27], "whitespac": [2, 26, 27, 29], "whither": 26, "who": [6, 9, 17, 18, 19, 23, 26, 28], "whoever": 26, "whole": [8, 10, 20, 26], "whom": [23, 26], "whose": [18, 26], "why": [6, 7, 14, 17, 26], "width": [7, 11, 20, 28, 29], "wifiwebauthapplet": 6, "wii": 6, "wiiu": 6, "wildfir": 6, "wilson": 19, "win": [19, 26], "win64": [6, 14], "window": [6, 14, 20], "wire": 19, "wired_autocomplet": 19, "wired_first": 19, "wired_reinv": 19, "wired_seth": 19, "wired_video": 19, "wired_wir": 19, "within": [1, 2, 13, 18, 20, 23, 26, 27, 28], "without": [4, 6, 10, 18, 20, 25, 26, 28, 29, 32], "wjth": 7, "wmnj0klhtpib9lcutya8k": 11, "woeid": 23, "won": [6, 9, 26, 32], "word": [0, 1, 2, 8, 9, 12, 13, 14, 18, 19, 23, 29, 30, 31, 32], "word_count": 9, "word_freq": [9, 26], "word_frequ": [26, 27, 29, 32], "word_summari": 9, "word_token": [27, 29], "wordfreq": 4, "words_flat": 9, "words_per_post": 9, "words_to_extract": 9, "words_to_find": 9, "work": [6, 7, 9, 12, 13, 14, 17, 20, 23, 26, 28, 29, 32], "worker": [16, 19], "workflow": 32, "world": [7, 19], "worldnew": 19, "worri": [6, 17], "worth": 8, "would": [2, 6, 7, 9, 11, 14, 17, 18, 19, 20, 23, 25, 26, 28], "wow64": 6, "wrangl": 32, "wrap": 29, "wrestl": 28, "write": [2, 7, 32], "written": [9, 17, 18, 19], "wrong": [14, 17], "wtd_freq": [26, 29], "wtd_freq_perc": 26, "wtd_freq_perc_cum": 26, "wv": 6, "www": [4, 6, 7, 8, 9, 10, 11, 14, 17, 19], "x": [6, 12, 13, 14, 18, 19, 20, 32], "x10": 6, "x11": [6, 14], "x5": 6, "x64": [6, 14], "x86_64": [6, 14], "x906c": 6, "xbox": 6, "xbox_one_": 6, "xhtml": [10, 20], "xj8sxgocdz6ejcp7jspbqllibivmpewy7as1poez30pvqlaqvjrgeqtlfp1dblpyb0bdd": 11, "xlarg": 18, "xm": 20, "xml": [0, 10, 17, 25, 29, 30, 31, 32], "xpath": [0, 4, 5, 29, 30, 31, 32], "xpath_selector": [4, 20, 29], "xperia": 6, "xr": 6, "xxlarg": 18, "xz": 6, "y": [6, 14, 18], "yahoo": [6, 23], "yandex": 17, "yandexbot": 14, "ye": [10, 12], "yea": 12, "year": [18, 19, 23], "yellow": [8, 18], "yet": [7, 13, 20, 25, 26], "yeti": 17, "yh5baeaaaaalaaaaaabaaeaaaibraa7": 11, "yoga": 6, "york": 19, "you": [1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 23, 25, 26, 27, 28, 30, 32], "your": [2, 4, 6, 7, 8, 9, 10, 12, 13, 14, 16, 17, 18, 19, 20, 23, 24, 25, 26, 28, 30], "your_app_kei": 23, "your_app_secret": 23, "your_cx": 18, "your_google_developer_kei": 12, "your_kei": 18, "your_oauth_token": 23, "your_oauth_token_secret": 23, "your_user_ag": 6, "yourself": [20, 26], "yourselv": 26, "yout": 25, "youtub": [0, 12, 26, 29, 30, 31, 32], "youtube_channel_detail": 18, "youtube_video_detail": 18, "youuuuuu": 9, "youuuuuuu": 9, "ysearch": 6, "ysriv8zrqmwha1znqibuuv3jo3cn5fly3qimy2kitajb3": 11, "yt": 6, "yymmdd_article_titl": 19, "yyyi": [19, 23, 28], "z": [6, 14, 19], "z4": 6, "z5": 6, "zero": [12, 18, 20, 28], "zgrab": 14, "zh": [18, 28], "ziggozakelijk": 14, "zip": [9, 14, 29], "zo0cnvuigj": [9, 26], "\u00e0": 1, "\u0294": [0, 29, 30, 31], "\u03b5\u03af\u03c3\u03b1\u03b9": 9, "\u03c0\u03ce\u03c2": 9, "\u062a\u0630\u0647\u0628": 9, "\u062d\u0627\u0644\u0643": 9, "\u0643\u064a\u0641": 9, "\u0644\u0627": 9, "\u0645\u0631\u062d\u0628\u0627": 9}, "titles": ["advertools package", "Create Ads on a Large Scale", "Create Ads Using Long Descriptive Text (top-down approach)", "advertools.cli module", "advertools Command Line Interface (CLI)", "advertools.code_recipes package", "\ud83d\udd77 SEO Crawling &amp; Scraping: Strategies &amp; Recipes", "Crawling and Scraping Analysis", "Emoji: Extract, Analyze, and Get Insights", "Extract structured entities from text lists", "\ud83d\udd77 Python Status Code Checker with Response Headers", "Image Crawler and Downloader", "Import and Analyze Knowledge Graph Results on a Large Scale", "Generate Keywords for SEM Campaigns", "Log File Analysis", "Regular Expressions for Extracting Structured Entities", "Reverse DNS Lookup in Bulk", "\ud83e\udd16 Analyze and Test robots.txt Files on a Large Scale", "Import Search Engine Results Pages (SERPs) for Google and YouTube", "Download, Parse, and Analyze XML Sitemaps", "\ud83d\udd77 Python SEO Crawler / Spider", "Stopwords in Several Languages", "Survey - share feedback", "Twitter Data API", "URL Builders", "Split, Parse, and Analyze URL Structure", "Text Analysis", "Tokenize Words (N-grams)", "YouTube Data API", "advertools", "advertools", "advertools", "<code class=\"docutils literal notranslate\"><span class=\"pre\">advertools</span></code>: productivity &amp; analysis tools to scale your online marketing"], "titleterms": {"": 12, "0": 29, "01": 29, "02": 29, "03": 29, "04": 29, "05": 29, "06": 29, "07": 29, "08": 29, "09": 29, "1": 29, "10": 29, "11": 29, "12": 29, "1234567890\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669\u32ba\ud804\udc5b\ud800\udd0d\ud802\udcaa\u24f2\ud804\udc63\ud800\udd28\ud802\udd1b": 9, "13": 29, "14": 29, "15": 29, "17": 29, "18": 29, "19": 29, "2": 29, "2018": 29, "2019": 29, "2020": 29, "2021": 29, "2022": 29, "2023": 29, "2024": 29, "21": 29, "22": 29, "23": 29, "24": 29, "25": 29, "26": 29, "27": 29, "29": 29, "3": 29, "30": 29, "31": 29, "4": 29, "5": 29, "6": 29, "7": 29, "8": 29, "9": 29, "On": 20, "The": 25, "absolut": 26, "account": 12, "ad": [1, 2], "addit": 20, "address": 4, "advertool": [0, 3, 4, 5, 29, 30, 31, 32], "agent": [6, 17], "an": 4, "analysi": [7, 14, 26, 30, 32], "analyt": 20, "analyz": [7, 8, 12, 14, 17, 19, 25], "api": [12, 23, 28], "approach": [2, 17, 20, 32], "articl": [2, 20], "audit": 6, "authent": 23, "automat": 6, "base": 6, "behavior": 20, "builder": 24, "bulk": [16, 17], "campaign": [13, 32], "can": 6, "certain": 6, "chang": [6, 29, 30], "checker": 10, "cli": [3, 4], "code": 10, "code_recip": 5, "column": 7, "command": 4, "compon": 4, "compress": [4, 7], "concurr": 6, "condit": 6, "consol": 20, "content": [0, 5, 30, 32], "control": 6, "convent": 32, "convert": 4, "copi": 6, "count": [4, 26], "crawl": [4, 6, 7, 14, 20], "crawler": [4, 11, 20], "creat": [1, 2], "css": 20, "csv": 4, "currenc": 9, "custom": [6, 20], "data": [7, 14, 20, 23, 28], "datafram": [4, 14], "default": 6, "depth": 6, "descript": 2, "desir": 4, "determin": 20, "di": 6, "directori": 25, "discoveri": 20, "dn": [4, 16], "do": 6, "document": 4, "domain": 6, "don": 6, "down": [2, 6], "download": [4, 11, 19], "element": 20, "emoji": [4, 8, 9], "engin": 18, "entiti": [4, 9, 15], "etc": 4, "exclam": 9, "explor": 7, "express": [6, 15], "extract": [4, 6, 8, 9, 15, 20], "facebook": 2, "feed": 2, "feedback": 22, "file": [4, 7, 11, 14, 17], "follow": [6, 20], "format": [4, 14], "frequenc": 26, "from": [4, 6, 8, 9], "function": [7, 9, 14, 20, 23], "gener": [4, 13], "get": [4, 8], "googl": [2, 12, 18, 20], "gram": 27, "graph": 12, "handl": 7, "hard": 6, "hashtag": [4, 9], "head": 4, "header": [6, 10, 19], "hit": 6, "how": [6, 12, 14], "i": 6, "imag": [7, 11], "import": [12, 18], "includ": 6, "index": [19, 30], "indic": 30, "insight": 8, "instal": 32, "instant": 2, "intent": 4, "interfac": 4, "ip": 4, "its": 6, "job": 6, "keyword": [4, 13], "knowledg": 12, "known": 4, "languag": [4, 21], "larg": [1, 7, 12, 17, 25], "later": 6, "length": 4, "line": 4, "link": [6, 7, 20], "list": [4, 6, 9, 20], "log": [4, 6, 14, 29, 30], "logs_to_df": 14, "long": 2, "lookup": [4, 16], "make": 6, "market": [30, 32], "media": [30, 32], "mention": [4, 9], "method": 4, "mode": [6, 20], "modul": [0, 3, 5, 7], "multipl": 6, "my": 6, "n": 27, "name": 11, "netloc": 4, "new": [19, 20], "number": [4, 6, 9, 25], "obei": 6, "onli": 6, "onlin": [30, 32], "option": 4, "packag": [0, 5], "page": [6, 18, 20], "paramet": [20, 25], "parquet": [4, 7], "pars": [4, 14, 19, 25], "path": [4, 25], "pattern": 20, "paus": 6, "perform": 4, "philosophi": 32, "phrase": 4, "pre": 20, "prepar": 14, "product": [4, 30, 32], "proxi": 6, "python": [10, 20], "queri": [4, 20, 25], "question": 9, "recip": 6, "redirect": 7, "regex": [4, 20], "regular": [15, 19], "request": [6, 19], "respons": 10, "result": [12, 18], "resum": 6, "revers": [4, 16], "robot": [4, 6, 17], "rule": 6, "run": 14, "same": 6, "save": [4, 6], "scale": [1, 12, 17, 32], "scheme": 4, "scrape": [6, 7], "search": [4, 8, 18, 20], "select": 4, "selector": 20, "sem": [4, 13, 30, 32], "seo": [4, 6, 20, 30, 32], "serp": [18, 20], "server": 6, "set": [6, 20], "setup": 12, "sever": 21, "share": 22, "sitemap": [4, 19], "slow": 6, "so": 6, "social": [30, 32], "specifi": 6, "spider": 20, "split": [4, 25], "statu": 10, "stop": 6, "stopword": [4, 21], "strategi": 6, "string": 6, "structur": [4, 9, 15, 25], "sub": 6, "submodul": [0, 5], "subpackag": 0, "suppli": 4, "support": 14, "sure": 6, "survei": 22, "t": 6, "tabl": [4, 30], "test": 17, "tester": 17, "text": [2, 4, 8, 9, 26, 30, 32], "them": 6, "those": 6, "token": [4, 27], "too": 6, "tool": [30, 32], "top": 2, "tweet": 4, "twice": 6, "twitter": 23, "txt": [4, 6, 17], "type": 7, "url": [4, 20, 24, 25], "us": [2, 4, 6, 12], "user": [6, 17], "v": 26, "veri": 7, "video": 19, "want": 6, "websit": [6, 7], "weight": [4, 26], "while": [6, 20], "word": [4, 26, 27], "xml": [4, 19], "xpath": [6, 20], "your": 32, "youtub": [18, 28], "\u0294": 9}})
\ No newline at end of file
+Search.setIndex({"alltitles": {"0.1.0 (2018-07-02)": [[29, "id42"]], "0.10.0 (2020-05-21)": [[29, "id24"]], "0.10.1 (2020-05-23)": [[29, "id23"]], "0.10.2 (2020-05-25)": [[29, "id22"]], "0.10.3 (2020-06-03)": [[29, "id21"]], "0.10.4 (2020-06-07)": [[29, "id20"]], "0.10.5 (2020-06-14)": [[29, "id19"]], "0.10.6 (2020-06-30)": [[29, "id18"]], "0.10.7 (2020-09-18)": [[29, "id17"]], "0.11.0 (2021-03-31)": [[29, "id16"]], "0.11.1 (2021-04-09)": [[29, "id15"]], "0.12.0,1,2 (2021-11-27)": [[29, "id14"]], "0.12.3 (2021-11-27)": [[29, "id13"]], "0.13.0 (2022-02-10)": [[29, "id12"]], "0.13.1 (2022-05-11)": [[29, "id11"]], "0.13.2 (2022-09-30)": [[29, "id10"]], "0.13.3 (2023-06-27)": [[29, "id9"]], "0.13.4 (2023-07-26)": [[29, "id8"]], "0.13.5 (2023-08-22)": [[29, "id7"]], "0.14.0 (2024-02-18)": [[29, "id6"]], "0.14.1 (2024-02-21)": [[29, "id5"]], "0.14.2 (2024-02-24)": [[29, "id4"]], "0.14.3 (2024-06-27)": [[29, "id3"]], "0.14.4 (2024-07-13)": [[29, "id2"]], "0.15.0 (2024-07-15)": [[29, "id1"]], "0.2.0 (2018-07-06)": [[29, "id41"]], "0.3.0 (2018-08-14)": [[29, "id40"]], "0.4.0 (2018-10-08)": [[29, "id39"]], "0.4.1 (2018-10-13)": [[29, "id38"]], "0.5.0 (2018-11-04)": [[29, "id37"]], "0.5.1 (2018-11-06)": [[29, "id36"]], "0.5.2 (2018-12-01)": [[29, "id35"]], "0.5.3 (2019-01-31)": [[29, "id34"]], "0.6.0 (2019-02-11)": [[29, "id33"]], "0.7.0 (2019-03-26)": [[29, "id32"]], "0.7.1 (2019-03-26)": [[29, "id31"]], "0.7.2 (2019-03-29)": [[29, "id30"]], "0.7.3 (2019-04-17)": [[29, "id29"]], "0.8.0 (2020-02-02)": [[29, "id28"]], "0.8.1 (2020-02-08)": [[29, "id27"]], "0.9.0 (2020-04-03)": [[29, "id26"]], "0.9.1 (2020-05-19)": [[29, "id25"]], "Absolute and Weighted Word Count": [[26, "absolute-and-weighted-word-count"]], "Absolute vs Weighted Frequency": [[26, "absolute-vs-weighted-frequency"]], "Account Setup": [[12, "account-setup"]], "Analyzing a large number of URLs": [[25, "analyzing-a-large-number-of-urls"]], "Analyzing crawled images": [[7, "analyzing-crawled-images"]], "Analyzing links in a crawled website": [[7, "analyzing-links-in-a-crawled-website"]], "Analyzing the redirects of a crawled website": [[7, "analyzing-the-redirects-of-a-crawled-website"]], "Authentication": [[23, "authentication"]], "Bulk robots.txt Tester": [[17, "bulk-robots-txt-tester"]], "Change Log - advertools": [[29, "change-log-advertools"]], "Compressing large crawl files": [[7, "compressing-large-crawl-files"]], "Conventions": [[32, "conventions"]], "Crawling and Scraping Analysis": [[7, null]], "Create Ads Using Long Descriptive Text (top-down approach)": [[2, null]], "Create Ads on a Large Scale": [[1, null]], "Custom Extraction with CSS and XPath Selectors": [[20, "custom-extraction-with-css-and-xpath-selectors"]], "Customizing the Crawling Behavior while Following Links": [[20, "customizing-the-crawling-behavior-while-following-links"]], "Discovery Crawling Approach": [[20, "discovery-crawling-approach"]], "Download, Parse, and Analyze XML Sitemaps": [[19, null]], "Emoji Search": [[8, "emoji-search"]], "Emoji: Extract, Analyze, and Get Insights": [[8, null]], "Exploring the columns and data types of parquet files": [[7, "exploring-the-columns-and-data-types-of-parquet-files"]], "Extract #hashtags": [[9, "extract-hashtags"]], "Extract @mentions": [[9, "extract-mentions"]], "Extract Currency  $ \u00a2 \u00a3 \u00a4 \u00a5 \u058f \u060b \u20b2 \u20b5 \u20b8 \u20b9\ufe69 \uffe0 \uffe1 \uffe5 \uffe6 \u20ba \u20bb \u20bc \u20bd \u20be \u20bf \ufdfc": [[9, "extract-currency"]], "Extract Emoji from Text": [[8, "extract-emoji-from-text"]], "Extract Emoji \ud83d\ude02\ud83d\ude2d\ud83e\udd7a\ud83e\udd23\u2764\ufe0f\u2728\ud83d\ude4f\ud83d\ude0d": [[9, "extract-emoji"]], "Extract Exclamations ! \u00a1 \u055c \u07f9 \u1944 \u203c \u2048 \u2049 \ufe15 \ufe57 \uff01 \ud83a\udd5e": [[9, "extract-exclamations"]], "Extract Functions": [[9, "extract-functions"]], "Extract numbers 1234567890\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669\u32ba\ud804\udc5b\ud800\udd0d\ud802\udcaa\u24f2\ud804\udc63\ud800\udd28\ud802\udd1b": [[9, "extract-numbers-123456789045"]], "Extract questions ? \u00bf \u037e \u055e \u061f \u1367 \u1945 \u2047 \u2048 \u2049 \u2cfa \u2cfb \u2e2e \ua60f \ua6f7 \ufe16 \ufe56 \uff1f \ud804\udd43 \ud83a\udd5f \u0294 \u203d": [[9, "extract-questions"]], "Extract structured entities from text lists": [[9, null]], "Extracted On-Page SEO Elements": [[20, "extracted-on-page-seo-elements"]], "Facebook Feed Ads": [[2, "facebook-feed-ads"]], "Facebook Instant Article Ad": [[2, "facebook-instant-article-ad"]], "Functions": [[23, "functions"]], "Generate Keywords for SEM Campaigns": [[13, null]], "Google Analytics / Google Search Console": [[20, "google-analytics-google-search-console"]], "Google Text Ads": [[2, "google-text-ads"]], "Handling very large crawl files": [[7, "handling-very-large-crawl-files"]], "How can I (dis)obey robots.txt rules?": [[6, "how-can-i-dis-obey-robots-txt-rules"]], "How can I automatically stop my crawl based on a certain condition?": [[6, "how-can-i-automatically-stop-my-crawl-based-on-a-certain-condition"]], "How can I change the default request headers?": [[6, "how-can-i-change-the-default-request-headers"]], "How can I control the number of concurrent requests while crawling?": [[6, "how-can-i-control-the-number-of-concurrent-requests-while-crawling"]], "How can I crawl a website including its sub-domains?": [[6, "how-can-i-crawl-a-website-including-its-sub-domains"]], "How can I save a copy of the logs of my crawl for auditing them later?": [[6, "how-can-i-save-a-copy-of-the-logs-of-my-crawl-for-auditing-them-later"]], "How can I set multiple settings to the same crawl job?": [[6, "how-can-i-set-multiple-settings-to-the-same-crawl-job"]], "How can I slow down the crawling so I don't hit the websites' servers too hard?": [[6, "how-can-i-slow-down-the-crawling-so-i-don-t-hit-the-websites-servers-too-hard"]], "How do I pause/resume crawling, while making sure I don't crawl the same page twice?": [[6, "how-do-i-pause-resume-crawling-while-making-sure-i-don-t-crawl-the-same-page-twice"]], "How do I set my User-agent while crawling?": [[6, "how-do-i-set-my-user-agent-while-crawling"]], "How do I use a proxy while crawling?": [[6, "how-do-i-use-a-proxy-while-crawling"]], "How to crawl a list of pages, and those pages only (list mode)?": [[6, "how-to-crawl-a-list-of-pages-and-those-pages-only-list-mode"]], "How to run the logs_to_df() function:": [[14, "how-to-run-the-logs-to-df-function"]], "How to use Google's Knowledge Graph API": [[12, "how-to-use-google-s-knowledge-graph-api"]], "I want to crawl a list of pages, follow links from those pages, but only to a certain specified depth": [[6, "i-want-to-crawl-a-list-of-pages-follow-links-from-those-pages-but-only-to-a-certain-specified-depth"]], "Image Crawler and Downloader": [[11, null]], "Image file names": [[11, "image-file-names"]], "Import Search Engine Results Pages (SERPs) for Google and YouTube": [[18, null]], "Import and Analyze Knowledge Graph Results on a Large Scale": [[12, null]], "Index & Change Log": [[30, null]], "Indices and tables": [[30, "indices-and-tables"]], "Installation": [[32, "installation"]], "Log File Analysis": [[14, null]], "Log File Analysis - Data Preparation": [[14, "log-file-analysis-data-preparation"]], "Module contents": [[0, "module-advertools"], [5, "module-advertools.code_recipes"]], "Module functions": [[7, "module-functions"]], "News Articles": [[20, "news-articles"]], "News Sitemaps": [[19, "news-sitemaps"]], "Online marketing productivity and analysis tools": [[30, "online-marketing-productivity-and-analysis-tools"]], "Parse and Analyze Crawl Logs in a Dataframe": [[14, "parse-and-analyze-crawl-logs-in-a-dataframe"]], "Philosophy/approach": [[32, "philosophy-approach"]], "Pre-Determined Crawling Approach (List Mode)": [[20, "pre-determined-crawling-approach-list-mode"]], "Query Parameters": [[25, "query-parameters"]], "Regular Expressions for Extracting Structured Entities": [[15, null]], "Regular XML Sitemaps": [[19, "regular-xml-sitemaps"]], "Request Headers": [[19, "request-headers"]], "Reverse DNS Lookup in Bulk": [[16, null]], "SEM": [[30, null]], "SEM Campaigns": [[32, "sem-campaigns"]], "SEO": [[30, null], [32, "seo"]], "SEO crawler": [[4, "seo-crawler"]], "SERP Data": [[20, "serp-data"]], "Sitemap Index": [[19, "sitemap-index"]], "Social Media": [[30, null], [32, "social-media"]], "Spider Custom Settings and Additional Functionality": [[20, "spider-custom-settings-and-additional-functionality"]], "Split, Parse, and Analyze URL Structure": [[25, null]], "Stopword Languages": [[21, "stopword-languages"]], "Stopwords in Several Languages": [[21, null]], "Submodules": [[0, "submodules"], [5, "submodules"]], "Subpackages": [[0, "subpackages"]], "Supported Log Formats": [[14, "supported-log-formats"]], "Survey - share feedback": [[22, null]], "Text & Content Analysis": [[30, null]], "Text & Content Analysis (for SEO & Social Media)": [[32, "text-content-analysis-for-seo-social-media"]], "Text Analysis": [[26, null]], "The URL Path (Directories):": [[25, "the-url-path-directories"]], "Tokenize Words (N-grams)": [[27, null]], "Twitter Data API": [[23, null]], "URL Builders": [[24, null]], "URL Query Parameters": [[20, "url-query-parameters"]], "URL Regex Patterns": [[20, "url-regex-patterns"]], "User-agent strings for use in crawling": [[6, "user-agent-strings-for-use-in-crawling"]], "User-agents": [[17, "user-agents"]], "Video Sitemaps": [[19, "video-sitemaps"]], "XPath expressions for custom extraction": [[6, "xpath-expressions-for-custom-extraction"]], "YouTube Data API": [[28, null]], "advertools": [[29, null], [30, null], [31, null]], "advertools Command Line Interface (CLI)": [[4, null]], "advertools package": [[0, null]], "advertools.cli module": [[3, null]], "advertools.code_recipes package": [[5, null]], "advertools: productivity & analysis tools to scale your online marketing": [[32, null]], "convert a robots.txt file (or list of file URLs) to a table in a CSV format": [[4, "convert-a-robots-txt-file-or-list-of-file-urls-to-a-table-in-a-csv-format"]], "crawl a list of known URLs using the HEAD method": [[4, "crawl-a-list-of-known-urls-using-the-head-method"]], "download, parse, and save an XML sitemap to a table in a CSV file": [[4, "download-parse-and-save-an-xml-sitemap-to-a-table-in-a-csv-file"]], "extract structured entities from a text list; emoji, hashtags, mentions": [[4, "extract-structured-entities-from-a-text-list-emoji-hashtags-mentions"]], "generate a table of SEM keywords by supplying a list of products and a list of intent words": [[4, "generate-a-table-of-sem-keywords-by-supplying-a-list-of-products-and-a-list-of-intent-words"]], "get stopwords of the selected language": [[4, "get-stopwords-of-the-selected-language"]], "get word counts of a text list optionally weighted by a number list": [[4, "get-word-counts-of-a-text-list-optionally-weighted-by-a-number-list"]], "parse, compress and convert a log file to a DataFrame in the .parquet format": [[4, "parse-compress-and-convert-a-log-file-to-a-dataframe-in-the-parquet-format"]], "perform a reverse DNS lookup on a list of IP addresses": [[4, "perform-a-reverse-dns-lookup-on-a-list-of-ip-addresses"]], "robots.txt Testing Approach": [[17, "robots-txt-testing-approach"]], "search for emoji using a regex": [[4, "search-for-emoji-using-a-regex"]], "split a list of URLs into their components: scheme, netloc, path, query, etc.": [[4, "split-a-list-of-urls-into-their-components-scheme-netloc-path-query-etc"]], "tokenize documents (phrases, keywords, tweets, etc) into token of the desired length": [[4, "tokenize-documents-phrases-keywords-tweets-etc-into-token-of-the-desired-length"]], "\ud83d\udd77 Python SEO Crawler / Spider": [[20, null]], "\ud83d\udd77 Python Status Code Checker with Response Headers": [[10, null]], "\ud83d\udd77 SEO Crawling & Scraping: Strategies & Recipes": [[6, null]], "\ud83e\udd16 Analyze and Test robots.txt Files on a Large Scale": [[17, null]]}, "docnames": ["advertools", "advertools.ad_create", "advertools.ad_from_string", "advertools.cli", "advertools.cli.cli", "advertools.code_recipes", "advertools.code_recipes.spider_strategies", "advertools.crawlytics", "advertools.emoji", "advertools.extract", "advertools.header_spider", "advertools.image_spider", "advertools.knowledge_graph", "advertools.kw_generate", "advertools.logs", "advertools.regex", "advertools.reverse_dns_lookup", "advertools.robotstxt", "advertools.serp", "advertools.sitemaps", "advertools.spider", "advertools.stopwords", "advertools.survey", "advertools.twitter", "advertools.url_builders", "advertools.urlytics", "advertools.word_frequency", "advertools.word_tokenize", "advertools.youtube", "include_changelog", "index", "modules", "readme"], "envversion": {"sphinx": 62, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1}, "filenames": ["advertools.rst", "advertools.ad_create.rst", "advertools.ad_from_string.rst", "advertools.cli.rst", "advertools.cli.cli.rst", "advertools.code_recipes.rst", "advertools.code_recipes.spider_strategies.rst", "advertools.crawlytics.rst", "advertools.emoji.rst", "advertools.extract.rst", "advertools.header_spider.rst", "advertools.image_spider.rst", "advertools.knowledge_graph.rst", "advertools.kw_generate.rst", "advertools.logs.rst", "advertools.regex.rst", "advertools.reverse_dns_lookup.rst", "advertools.robotstxt.rst", "advertools.serp.rst", "advertools.sitemaps.rst", "advertools.spider.rst", "advertools.stopwords.rst", "advertools.survey.rst", "advertools.twitter.rst", "advertools.url_builders.rst", "advertools.urlytics.rst", "advertools.word_frequency.rst", "advertools.word_tokenize.rst", "advertools.youtube.rst", "include_changelog.rst", "index.rst", "modules.rst", "readme.rst"], "indexentries": {"activities_list() (in module advertools.youtube)": [[28, "advertools.youtube.activities_list", false]], "ad_create() (in module advertools.ad_create)": [[1, "advertools.ad_create.ad_create", false]], "ad_from_string() (in module advertools.ad_from_string)": [[2, "advertools.ad_from_string.ad_from_string", false]], "advertools": [[0, "module-advertools", false]], "advertools.ad_create": [[1, "module-advertools.ad_create", false]], "advertools.ad_from_string": [[2, "module-advertools.ad_from_string", false]], "advertools.cli": [[3, "module-advertools.cli", false]], "advertools.cli.cli": [[4, "module-advertools.cli.cli", false]], "advertools.code_recipes": [[5, "module-advertools.code_recipes", false]], "advertools.code_recipes.spider_strategies": [[6, "module-advertools.code_recipes.spider_strategies", false]], "advertools.crawlytics": [[7, "module-advertools.crawlytics", false]], "advertools.emoji": [[8, "module-advertools.emoji", false]], "advertools.extract": [[9, "module-advertools.extract", false]], "advertools.header_spider": [[10, "module-advertools.header_spider", false]], "advertools.image_spider": [[11, "module-advertools.image_spider", false]], "advertools.knowledge_graph": [[12, "module-advertools.knowledge_graph", false]], "advertools.kw_generate": [[13, "module-advertools.kw_generate", false]], "advertools.logs": [[14, "module-advertools.logs", false]], "advertools.regex": [[15, "module-advertools.regex", false]], "advertools.reverse_dns_lookup": [[16, "module-advertools.reverse_dns_lookup", false]], "advertools.robotstxt": [[17, "module-advertools.robotstxt", false]], "advertools.serp": [[18, "module-advertools.serp", false]], "advertools.sitemaps": [[19, "module-advertools.sitemaps", false]], "advertools.spider": [[20, "module-advertools.spider", false]], "advertools.stopwords": [[21, "module-advertools.stopwords", false]], "advertools.twitter": [[23, "module-advertools.twitter", false]], "advertools.url_builders": [[24, "module-advertools.url_builders", false]], "advertools.urlytics": [[25, "module-advertools.urlytics", false]], "advertools.word_frequency": [[26, "module-advertools.word_frequency", false]], "advertools.word_tokenize": [[27, "module-advertools.word_tokenize", false]], "advertools.youtube": [[28, "module-advertools.youtube", false]], "advimagespipeline (class in advertools.image_spider)": [[11, "advertools.image_spider.AdvImagesPipeline", false]], "authenticate() (in module advertools.twitter)": [[23, "advertools.twitter.authenticate", false]], "capitalize": [[2, "term-capitalize", true]], "captions_list() (in module advertools.youtube)": [[28, "advertools.youtube.captions_list", false]], "channel_sections_list() (in module advertools.youtube)": [[28, "advertools.youtube.channel_sections_list", false]], "channels_list() (in module advertools.youtube)": [[28, "advertools.youtube.channels_list", false]], "comment_threads_list() (in module advertools.youtube)": [[28, "advertools.youtube.comment_threads_list", false]], "comments_list() (in module advertools.youtube)": [[28, "advertools.youtube.comments_list", false]], "compare() (in module advertools.crawlytics)": [[7, "advertools.crawlytics.compare", false]], "crawl() (in module advertools.spider)": [[20, "advertools.spider.crawl", false]], "crawl_headers() (in module advertools.header_spider)": [[10, "advertools.header_spider.crawl_headers", false]], "crawl_images() (in module advertools.image_spider)": [[11, "advertools.image_spider.crawl_images", false]], "crawllogs_to_df() (in module advertools.logs)": [[14, "advertools.logs.crawllogs_to_df", false]], "custom_settings (headersspider attribute)": [[10, "advertools.header_spider.HeadersSpider.custom_settings", false]], "custom_settings (imagespider attribute)": [[11, "advertools.image_spider.ImageSpider.custom_settings", false]], "emoji_search() (in module advertools.emoji)": [[8, "advertools.emoji.emoji_search", false]], "errback() (headersspider method)": [[10, "advertools.header_spider.HeadersSpider.errback", false]], "extra_info": [[26, "term-extra_info", true]], "extract() (in module advertools.extract)": [[9, "advertools.extract.extract", false]], "extract_currency() (in module advertools.extract)": [[9, "advertools.extract.extract_currency", false]], "extract_emoji() (in module advertools.emoji)": [[8, "advertools.emoji.extract_emoji", false]], "extract_exclamations() (in module advertools.extract)": [[9, "advertools.extract.extract_exclamations", false]], "extract_hashtags() (in module advertools.extract)": [[9, "advertools.extract.extract_hashtags", false]], "extract_intense_words() (in module advertools.extract)": [[9, "advertools.extract.extract_intense_words", false]], "extract_mentions() (in module advertools.extract)": [[9, "advertools.extract.extract_mentions", false]], "extract_numbers() (in module advertools.extract)": [[9, "advertools.extract.extract_numbers", false]], "extract_questions() (in module advertools.extract)": [[9, "advertools.extract.extract_questions", false]], "extract_urls() (in module advertools.extract)": [[9, "advertools.extract.extract_urls", false]], "extract_words() (in module advertools.extract)": [[9, "advertools.extract.extract_words", false]], "fields (imgitem attribute)": [[11, "advertools.image_spider.ImgItem.fields", false]], "file_path() (advimagespipeline method)": [[11, "advertools.image_spider.AdvImagesPipeline.file_path", false]], "get_application_rate_limit_status() (in module advertools.twitter)": [[23, "advertools.twitter.get_application_rate_limit_status", false]], "get_available_trends() (in module advertools.twitter)": [[23, "advertools.twitter.get_available_trends", false]], "get_favorites() (in module advertools.twitter)": [[23, "advertools.twitter.get_favorites", false]], "get_followers_ids() (in module advertools.twitter)": [[23, "advertools.twitter.get_followers_ids", false]], "get_followers_list() (in module advertools.twitter)": [[23, "advertools.twitter.get_followers_list", false]], "get_friends_ids() (in module advertools.twitter)": [[23, "advertools.twitter.get_friends_ids", false]], "get_friends_list() (in module advertools.twitter)": [[23, "advertools.twitter.get_friends_list", false]], "get_home_timeline() (in module advertools.twitter)": [[23, "advertools.twitter.get_home_timeline", false]], "get_list_members() (in module advertools.twitter)": [[23, "advertools.twitter.get_list_members", false]], "get_list_memberships() (in module advertools.twitter)": [[23, "advertools.twitter.get_list_memberships", false]], "get_list_statuses() (in module advertools.twitter)": [[23, "advertools.twitter.get_list_statuses", false]], "get_list_subscribers() (in module advertools.twitter)": [[23, "advertools.twitter.get_list_subscribers", false]], "get_list_subscriptions() (in module advertools.twitter)": [[23, "advertools.twitter.get_list_subscriptions", false]], "get_mentions_timeline() (in module advertools.twitter)": [[23, "advertools.twitter.get_mentions_timeline", false]], "get_place_trends() (in module advertools.twitter)": [[23, "advertools.twitter.get_place_trends", false]], "get_retweeters_ids() (in module advertools.twitter)": [[23, "advertools.twitter.get_retweeters_ids", false]], "get_retweets() (in module advertools.twitter)": [[23, "advertools.twitter.get_retweets", false]], "get_supported_languages() (in module advertools.twitter)": [[23, "advertools.twitter.get_supported_languages", false]], "get_user_timeline() (in module advertools.twitter)": [[23, "advertools.twitter.get_user_timeline", false]], "guide_categories_list() (in module advertools.youtube)": [[28, "advertools.youtube.guide_categories_list", false]], "headersspider (class in advertools.header_spider)": [[10, "advertools.header_spider.HeadersSpider", false]], "i18n_languages_list() (in module advertools.youtube)": [[28, "advertools.youtube.i18n_languages_list", false]], "i18n_regions_list() (in module advertools.youtube)": [[28, "advertools.youtube.i18n_regions_list", false]], "images() (in module advertools.crawlytics)": [[7, "advertools.crawlytics.images", false]], "imagespider (class in advertools.image_spider)": [[11, "advertools.image_spider.ImageSpider", false]], "imgitem (class in advertools.image_spider)": [[11, "advertools.image_spider.ImgItem", false]], "include_img_regex (imagespider attribute)": [[11, "advertools.image_spider.ImageSpider.include_img_regex", false]], "jl_subset() (in module advertools.crawlytics)": [[7, "advertools.crawlytics.jl_subset", false]], "jl_to_parquet() (in module advertools.crawlytics)": [[7, "advertools.crawlytics.jl_to_parquet", false]], "knowledge_graph() (in module advertools.knowledge_graph)": [[12, "advertools.knowledge_graph.knowledge_graph", false]], "kw_broad() (in module advertools.kw_generate)": [[13, "advertools.kw_generate.kw_broad", false]], "kw_exact() (in module advertools.kw_generate)": [[13, "advertools.kw_generate.kw_exact", false]], "kw_generate() (in module advertools.kw_generate)": [[13, "advertools.kw_generate.kw_generate", false]], "kw_modified() (in module advertools.kw_generate)": [[13, "advertools.kw_generate.kw_modified", false]], "kw_neg_broad() (in module advertools.kw_generate)": [[13, "advertools.kw_generate.kw_neg_broad", false]], "kw_neg_exact() (in module advertools.kw_generate)": [[13, "advertools.kw_generate.kw_neg_exact", false]], "kw_neg_phrase() (in module advertools.kw_generate)": [[13, "advertools.kw_generate.kw_neg_phrase", false]], "kw_phrase() (in module advertools.kw_generate)": [[13, "advertools.kw_generate.kw_phrase", false]], "links() (in module advertools.crawlytics)": [[7, "advertools.crawlytics.links", false]], "logs_to_df() (in module advertools.logs)": [[14, "advertools.logs.logs_to_df", false]], "lookup_status() (in module advertools.twitter)": [[23, "advertools.twitter.lookup_status", false]], "lookup_user() (in module advertools.twitter)": [[23, "advertools.twitter.lookup_user", false]], "make_dataframe() (in module advertools.twitter)": [[23, "advertools.twitter.make_dataframe", false]], "module": [[0, "module-advertools", false], [1, "module-advertools.ad_create", false], [2, "module-advertools.ad_from_string", false], [3, "module-advertools.cli", false], [4, "module-advertools.cli.cli", false], [5, "module-advertools.code_recipes", false], [6, "module-advertools.code_recipes.spider_strategies", false], [7, "module-advertools.crawlytics", false], [8, "module-advertools.emoji", false], [9, "module-advertools.extract", false], [10, "module-advertools.header_spider", false], [11, "module-advertools.image_spider", false], [12, "module-advertools.knowledge_graph", false], [13, "module-advertools.kw_generate", false], [14, "module-advertools.logs", false], [15, "module-advertools.regex", false], [16, "module-advertools.reverse_dns_lookup", false], [17, "module-advertools.robotstxt", false], [18, "module-advertools.serp", false], [19, "module-advertools.sitemaps", false], [20, "module-advertools.spider", false], [21, "module-advertools.stopwords", false], [23, "module-advertools.twitter", false], [24, "module-advertools.url_builders", false], [25, "module-advertools.urlytics", false], [26, "module-advertools.word_frequency", false], [27, "module-advertools.word_tokenize", false], [28, "module-advertools.youtube", false]], "name (headersspider attribute)": [[10, "advertools.header_spider.HeadersSpider.name", false]], "name (imagespider attribute)": [[11, "advertools.image_spider.ImageSpider.name", false]], "num_list": [[26, "term-num_list", true]], "parquet_columns() (in module advertools.crawlytics)": [[7, "advertools.crawlytics.parquet_columns", false]], "parse() (headersspider method)": [[10, "advertools.header_spider.HeadersSpider.parse", false]], "parse() (imagespider method)": [[11, "advertools.image_spider.ImageSpider.parse", false]], "phrase_len": [[26, "term-phrase_len", true]], "playlist_items_list() (in module advertools.youtube)": [[28, "advertools.youtube.playlist_items_list", false]], "playlists_list() (in module advertools.youtube)": [[28, "advertools.youtube.playlists_list", false]], "redirects() (in module advertools.crawlytics)": [[7, "advertools.crawlytics.redirects", false]], "regex": [[26, "term-regex", true]], "retweeted_of_me() (in module advertools.twitter)": [[23, "advertools.twitter.retweeted_of_me", false]], "reverse_dns_lookup() (in module advertools.reverse_dns_lookup)": [[16, "advertools.reverse_dns_lookup.reverse_dns_lookup", false]], "rm_words": [[26, "term-rm_words", true]], "robotstxt_test() (in module advertools.robotstxt)": [[17, "advertools.robotstxt.robotstxt_test", false]], "robotstxt_to_df() (in module advertools.robotstxt)": [[17, "advertools.robotstxt.robotstxt_to_df", false]], "running_crawls() (in module advertools.crawlytics)": [[7, "advertools.crawlytics.running_crawls", false]], "s": [[2, "term-s", true]], "search() (in module advertools.twitter)": [[23, "advertools.twitter.search", false]], "search() (in module advertools.youtube)": [[28, "advertools.youtube.search", false]], "search_users() (in module advertools.twitter)": [[23, "advertools.twitter.search_users", false]], "sep": [[2, "term-sep", true]], "serp_goog() (in module advertools.serp)": [[18, "advertools.serp.serp_goog", false]], "serp_youtube() (in module advertools.serp)": [[18, "advertools.serp.serp_youtube", false]], "set_auth_params() (in module advertools.twitter)": [[23, "advertools.twitter.set_auth_params", false]], "set_logging_level() (in module advertools.serp)": [[18, "advertools.serp.set_logging_level", false]], "show_lists() (in module advertools.twitter)": [[23, "advertools.twitter.show_lists", false]], "show_owned_lists() (in module advertools.twitter)": [[23, "advertools.twitter.show_owned_lists", false]], "sitemap_to_df() (in module advertools.sitemaps)": [[19, "advertools.sitemaps.sitemap_to_df", false]], "slots": [[2, "term-slots", true]], "start_requests() (headersspider method)": [[10, "advertools.header_spider.HeadersSpider.start_requests", false]], "start_requests() (imagespider method)": [[11, "advertools.image_spider.ImageSpider.start_requests", false]], "subscriptions_list() (in module advertools.youtube)": [[28, "advertools.youtube.subscriptions_list", false]], "summarize_crawled_imgs() (in module advertools.image_spider)": [[11, "advertools.image_spider.summarize_crawled_imgs", false]], "text_list": [[26, "term-text_list", true]], "url_to_df() (in module advertools.urlytics)": [[25, "advertools.urlytics.url_to_df", false]], "url_utm_ga() (in module advertools.url_builders)": [[24, "advertools.url_builders.url_utm_ga", false]], "video_categories_list() (in module advertools.youtube)": [[28, "advertools.youtube.video_categories_list", false]], "videos_list() (in module advertools.youtube)": [[28, "advertools.youtube.videos_list", false]], "word_frequency() (in module advertools.word_frequency)": [[26, "advertools.word_frequency.word_frequency", false]], "word_tokenize() (in module advertools.word_tokenize)": [[27, "advertools.word_tokenize.word_tokenize", false]], "youtube_channel_details() (in module advertools.serp)": [[18, "advertools.serp.youtube_channel_details", false]], "youtube_video_details() (in module advertools.serp)": [[18, "advertools.serp.youtube_video_details", false]]}, "objects": {"": [[0, 0, 0, "-", "advertools"]], "advertools": [[1, 0, 0, "-", "ad_create"], [2, 0, 0, "-", "ad_from_string"], [3, 0, 0, "-", "cli"], [5, 0, 0, "-", "code_recipes"], [7, 0, 0, "-", "crawlytics"], [8, 0, 0, "-", "emoji"], [9, 0, 0, "-", "extract"], [10, 0, 0, "-", "header_spider"], [11, 0, 0, "-", "image_spider"], [12, 0, 0, "-", "knowledge_graph"], [13, 0, 0, "-", "kw_generate"], [14, 0, 0, "-", "logs"], [15, 0, 0, "-", "regex"], [16, 0, 0, "-", "reverse_dns_lookup"], [17, 0, 0, "-", "robotstxt"], [18, 0, 0, "-", "serp"], [19, 0, 0, "-", "sitemaps"], [20, 0, 0, "-", "spider"], [21, 0, 0, "-", "stopwords"], [23, 0, 0, "-", "twitter"], [24, 0, 0, "-", "url_builders"], [25, 0, 0, "-", "urlytics"], [26, 0, 0, "-", "word_frequency"], [27, 0, 0, "-", "word_tokenize"], [28, 0, 0, "-", "youtube"]], "advertools.ad_create": [[1, 1, 1, "", "ad_create"]], "advertools.ad_from_string": [[2, 1, 1, "", "ad_from_string"]], "advertools.cli": [[4, 0, 0, "-", "cli"]], "advertools.code_recipes": [[6, 0, 0, "-", "spider_strategies"]], "advertools.crawlytics": [[7, 1, 1, "", "compare"], [7, 1, 1, "", "images"], [7, 1, 1, "", "jl_subset"], [7, 1, 1, "", "jl_to_parquet"], [7, 1, 1, "", "links"], [7, 1, 1, "", "parquet_columns"], [7, 1, 1, "", "redirects"], [7, 1, 1, "", "running_crawls"]], "advertools.emoji": [[8, 1, 1, "", "emoji_search"], [8, 1, 1, "", "extract_emoji"]], "advertools.extract": [[9, 1, 1, "", "extract"], [9, 1, 1, "", "extract_currency"], [9, 1, 1, "", "extract_exclamations"], [9, 1, 1, "", "extract_hashtags"], [9, 1, 1, "", "extract_intense_words"], [9, 1, 1, "", "extract_mentions"], [9, 1, 1, "", "extract_numbers"], [9, 1, 1, "", "extract_questions"], [9, 1, 1, "", "extract_urls"], [9, 1, 1, "", "extract_words"]], "advertools.header_spider": [[10, 2, 1, "", "HeadersSpider"], [10, 1, 1, "", "crawl_headers"]], "advertools.header_spider.HeadersSpider": [[10, 3, 1, "", "custom_settings"], [10, 4, 1, "", "errback"], [10, 3, 1, "", "name"], [10, 4, 1, "", "parse"], [10, 4, 1, "", "start_requests"]], "advertools.image_spider": [[11, 2, 1, "", "AdvImagesPipeline"], [11, 2, 1, "", "ImageSpider"], [11, 2, 1, "", "ImgItem"], [11, 1, 1, "", "crawl_images"], [11, 1, 1, "", "summarize_crawled_imgs"]], "advertools.image_spider.AdvImagesPipeline": [[11, 4, 1, "", "file_path"]], "advertools.image_spider.ImageSpider": [[11, 3, 1, "", "custom_settings"], [11, 3, 1, "", "include_img_regex"], [11, 3, 1, "", "name"], [11, 4, 1, "", "parse"], [11, 4, 1, "", "start_requests"]], "advertools.image_spider.ImgItem": [[11, 3, 1, "", "fields"]], "advertools.knowledge_graph": [[12, 1, 1, "", "knowledge_graph"]], "advertools.kw_generate": [[13, 1, 1, "", "kw_broad"], [13, 1, 1, "", "kw_exact"], [13, 1, 1, "", "kw_generate"], [13, 1, 1, "", "kw_modified"], [13, 1, 1, "", "kw_neg_broad"], [13, 1, 1, "", "kw_neg_exact"], [13, 1, 1, "", "kw_neg_phrase"], [13, 1, 1, "", "kw_phrase"]], "advertools.logs": [[14, 1, 1, "", "crawllogs_to_df"], [14, 1, 1, "", "logs_to_df"]], "advertools.reverse_dns_lookup": [[16, 1, 1, "", "reverse_dns_lookup"]], "advertools.robotstxt": [[17, 1, 1, "", "robotstxt_test"], [17, 1, 1, "", "robotstxt_to_df"]], "advertools.serp": [[18, 1, 1, "", "serp_goog"], [18, 1, 1, "", "serp_youtube"], [18, 1, 1, "", "set_logging_level"], [18, 1, 1, "", "youtube_channel_details"], [18, 1, 1, "", "youtube_video_details"]], "advertools.sitemaps": [[19, 1, 1, "", "sitemap_to_df"]], "advertools.spider": [[20, 1, 1, "", "crawl"]], "advertools.twitter": [[23, 1, 1, "", "authenticate"], [23, 1, 1, "", "get_application_rate_limit_status"], [23, 1, 1, "", "get_available_trends"], [23, 1, 1, "", "get_favorites"], [23, 1, 1, "", "get_followers_ids"], [23, 1, 1, "", "get_followers_list"], [23, 1, 1, "", "get_friends_ids"], [23, 1, 1, "", "get_friends_list"], [23, 1, 1, "", "get_home_timeline"], [23, 1, 1, "", "get_list_members"], [23, 1, 1, "", "get_list_memberships"], [23, 1, 1, "", "get_list_statuses"], [23, 1, 1, "", "get_list_subscribers"], [23, 1, 1, "", "get_list_subscriptions"], [23, 1, 1, "", "get_mentions_timeline"], [23, 1, 1, "", "get_place_trends"], [23, 1, 1, "", "get_retweeters_ids"], [23, 1, 1, "", "get_retweets"], [23, 1, 1, "", "get_supported_languages"], [23, 1, 1, "", "get_user_timeline"], [23, 1, 1, "", "lookup_status"], [23, 1, 1, "", "lookup_user"], [23, 1, 1, "", "make_dataframe"], [23, 1, 1, "", "retweeted_of_me"], [23, 1, 1, "", "search"], [23, 1, 1, "", "search_users"], [23, 1, 1, "", "set_auth_params"], [23, 1, 1, "", "show_lists"], [23, 1, 1, "", "show_owned_lists"]], "advertools.url_builders": [[24, 1, 1, "", "url_utm_ga"]], "advertools.urlytics": [[25, 1, 1, "", "url_to_df"]], "advertools.word_frequency": [[26, 1, 1, "", "word_frequency"]], "advertools.word_tokenize": [[27, 1, 1, "", "word_tokenize"]], "advertools.youtube": [[28, 1, 1, "", "activities_list"], [28, 1, 1, "", "captions_list"], [28, 1, 1, "", "channel_sections_list"], [28, 1, 1, "", "channels_list"], [28, 1, 1, "", "comment_threads_list"], [28, 1, 1, "", "comments_list"], [28, 1, 1, "", "guide_categories_list"], [28, 1, 1, "", "i18n_languages_list"], [28, 1, 1, "", "i18n_regions_list"], [28, 1, 1, "", "playlist_items_list"], [28, 1, 1, "", "playlists_list"], [28, 1, 1, "", "search"], [28, 1, 1, "", "subscriptions_list"], [28, 1, 1, "", "video_categories_list"], [28, 1, 1, "", "videos_list"]]}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "function", "Python function"], "2": ["py", "class", "Python class"], "3": ["py", "attribute", "Python attribute"], "4": ["py", "method", "Python method"]}, "objtypes": {"0": "py:module", "1": "py:function", "2": "py:class", "3": "py:attribute", "4": "py:method"}, "terms": {"": [0, 1, 2, 4, 6, 7, 9, 10, 11, 13, 14, 17, 18, 19, 20, 23, 25, 26, 28, 29, 30, 31, 32], "0": [4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 25, 26, 28, 30], "00": [7, 14, 17, 18, 19, 28], "000": [4, 18, 23, 27], "0000": 14, "000000": 26, "000b": 20, "001": 6, "002": 6, "0039": 20, "0043": 20, "004a": 20, "006f": 20, "00954418": 14, "00987329": 14, "00a1": 20, "00bf": 20, "00c2": 20, "00ce": 20, "00e6": 20, "00z": [18, 19, 28], "01": [7, 14, 17, 18, 19, 28, 30], "011": 6, "012": 6, "0126707": 14, "0129998": 14, "0133289": 14, "014": 6, "0185": 9, "0185947": 14, "018jz": 28, "018w8": 28, "019582": 14, "019_rr": 28, "01a2": 20, "01cgz": 28, "01h6rj": 28, "01h7lh": 28, "01k8wb": 28, "01lyv": 28, "01sjng": 28, "01t00": [18, 28], "02": [6, 7, 10, 17, 19, 30], "020": 6, "0213921": 14, "021bp2": 28, "022dc6": 28, "023": 6, "0233256": 7, "024": [19, 29], "0242": 6, "024x1": 29, "0253415": 14, "025zzc": 28, "026063": 7, "0270201": 7, "0270483": 10, "0271282": 10, "0279444": 7, "027x7n": 28, "0281389": 14, "028sqc": 28, "029949": 14, "02d86a3cea00007e9edb0cf2000000": 20, "02d86a3e0e00007e9edb0d72000000": 20, "02d86a3e1300007ec2a808a2000000": 20, "02d86a3e140000d437b81532000000": 20, "02d86a3e150000d423322742000000": 20, "02d86a494f0000d437b828b2000000": 20, "02d86a4a7f00007e9edb13a2000000": 20, "02d86a4a7f00007ec2a811f2000000": 20, "02d86a4a7f0000d423209db2000000": 20, "02d86a4a7f0000d423323b42000000": 20, "02hygl": 28, "02jjt": 28, "02lkt": 28, "02mscn": 28, "02ntfj": 28, "02vx4": 28, "02vxn": 28, "02wbm": 28, "03": [11, 17, 19, 30], "0315945": 14, "032tl": 28, "037hz": 28, "03_d0": 28, "03c3": 20, "03glg": 28, "03hf_rm": 28, "03t17": 19, "03tmr": 28, "04": [10, 19, 30], "0403l3g": 28, "0410tth": 28, "041xxh": 28, "0477209": 14, "04q1x3q": 28, "04rlf": 28, "05": [19, 20, 30], "05qjc": 28, "05qt0": 28, "05rwpb": 28, "06": [11, 19, 30], "0612363": 7, "0630789": 7, "06442": 10, "064t9": 28, "066667": 26, "066wd": 28, "068hy": 28, "06bvp": 28, "06by7": 28, "06cqb": 28, "06j6l": 28, "06ntj": 28, "07": [7, 17, 19, 30], "0701004": 14, "0710e93d610dd8c3": 10, "0774069": 19, "079844": 7, "07_53": 28, "07bs0": 28, "07bxq": 28, "07c1v": 28, "07yv9": 28, "08": [17, 19, 30], "08427": [18, 28], "087985": 17, "08t17": 19, "09": [7, 17, 19, 20, 30], "090302_gazaconferenciaml": 19, "090409_machienhuu_revisit": 19, "090421_mqm_speaks_rza": 19, "090524_paquistaoupdateg": 19, "090618_tomtest": 19, "090620_as_iraq_explosion_tc2": 19, "090620_iraq_blast_tc2": 19, "090622_me_egypt_us_tc2": 19, "090622_me_worldbank_tc2": 19, "090623_egitomilitaresfn": 19, "090623_iz_cairo_russia_tc2": 19, "090623_mz_leaders_lifespan_tc2": 19, "090624_me_inpictures_brazil_tc2": 19, "090624_mz_wimbledon_tc2": 19, "090625_sf_tamim_verdict_tc2": 19, "090628_rn_pakistani_soldiries_ambush": 19, "090629_om_pakistan_report_tc2": 19, "090715_hillary_iran_cq": 19, "090723_ae_silwan_tc2": 19, "090729_iraquerefenbritsfn": 19, "090830_ugc_ddh_sand": 19, "090831_dalailamataiwan": 19, "090901_japecontask": 19, "090901_putin_regret_pact": 19, "090901_tiananmen_movi": 19, "098wr": 28, "09kqc": 28, "09s1f": 28, "09t13": 19, "09t15": 19, "09xp_": 28, "0_larg": 11, "0b1553ff703bbd07ac8fe73e6d215888": 7, "0b1vjn": 28, "0bzvm2": 28, "0c79465a9793low": 19, "0cff645fbb74c21791568b78a888967d": 19, "0d790f23c36dlow": 19, "0f2f9": 28, "0g293": 28, "0ggq0m": 28, "0glt670": 28, "0gywn": 28, "0jm_": 28, "0kt51": 28, "1": [1, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 23, 25, 26, 27, 28, 30], "10": [2, 6, 7, 8, 9, 10, 14, 17, 18, 19, 20, 23, 25, 26, 30], "100": [6, 7, 14, 18, 20, 23, 26], "1000": [4, 18, 28], "10000ft": [18, 28], "101": [6, 14], "101e": 20, "102": 6, "1024px": 7, "103": [7, 14], "104": [9, 16, 20, 26], "105": [6, 7, 14], "10536": 6, "1058": 6, "10586": 6, "107": 6, "108": [6, 17], "1080p": [18, 28], "1083": 10, "109": 14, "1090": 14, "1093": 11, "10940941449492": 7, "1095": 14, "10968": 19, "10_0_1": 6, "10_11_2": 6, "10t17": 19, "11": [6, 7, 8, 9, 10, 14, 17, 18, 19, 20, 30], "110": 14, "111": 6, "1111": 6, "111111": 26, "1112": 6, "1113": 6, "1114": 6, "11264": 6, "1127_16056": 6, "113": 14, "1132": 9, "1149": 9, "1152": 6, "116": 6, "1160": [9, 26], "116a": 6, "117821": 14, "118": [6, 14], "118614": 10, "119": [6, 14], "11_0": 6, "11e1": 19, "11t17": 19, "12": [6, 7, 9, 11, 14, 17, 19, 20, 23, 30], "120": 11, "1202": 6, "1204": 19, "1210": 14, "122": [18, 28], "123": [6, 9], "1234567890\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669\u32ba\ud804\udc5b\ud800\udd0d\ud802\udcaa\u24f2\ud804\udc63\ud800\udd28\ud802\udd1b": [0, 29, 30, 31], "124": 32, "125": [2, 6, 16], "126": [9, 26], "1261": 14, "127": 6, "1274": 19, "1285": 19, "129": 14, "1293": 19, "12_0": 6, "13": [2, 6, 7, 9, 10, 11, 12, 14, 17, 19, 30], "130": [2, 14, 16, 19], "1303": 11, "1306": 12, "131k": 32, "132": 14, "13251": 12, "13270": 10, "133333": 26, "13343": 6, "1346": 9, "135": [6, 14], "1350": 19, "13537530305428": 7, "136": 6, "137": 14, "13_0": 6, "13c3": 20, "14": [6, 7, 9, 12, 14, 17, 19, 30], "140": 23, "14022": 19, "1415": 14, "143": 14, "1435": 12, "14393": 6, "146": [14, 17], "147": 17, "148": 17, "149": 17, "149416": 14, "14_0": 6, "14a403": 6, "14c904a172315a4922f4d28948b916c2": 10, "15": [2, 6, 7, 8, 9, 10, 11, 12, 14, 19, 25, 28, 30], "150": [7, 17], "1500m": [18, 28], "1506": 19, "1509": 12, "150px": 7, "1513102854": 11, "152": [6, 14], "15254": 6, "1534": 19, "154": [7, 14], "154258": 14, "1545": 14, "1555": 19, "157": [9, 26], "1585538956622": 19, "1585539039190": 19, "1585539054298": 19, "1585539172701": 19, "1585539206866": 19, "1585539237156": 19, "1585539358901": 19, "1585539536519": 19, "159": [6, 9], "1595": [9, 26], "15_0": 6, "15_4": 6, "15a372": 6, "15a5341f": 6, "15a5370a": 6, "15e148": 6, "16": [6, 9, 11, 14, 17, 19], "16041": 6, "1605": 9, "1622738336": 11, "163": 14, "164": 14, "1647": 20, "165": [9, 26], "1650": 6, "1657": 19, "1664": 14, "1677": 19, "16a366": 6, "17": [7, 10, 14, 16, 17, 19, 20, 30], "170": [4, 10, 20], "170811": 6, "170816": 6, "171": 14, "1710779249": 11, "1710779358": 11, "1710855734": 11, "1710855790": 11, "1711048323": 11, "1727": 9, "173": 14, "174": 14, "1743": 11, "176": 14, "177": 14, "1777": 14, "179": 14, "179365": 14, "18": [10, 11, 14, 19, 30], "180": [10, 20], "180610": 6, "181126": 6, "182": 14, "18323": 7, "184": 19, "1847": 6, "185": [14, 16], "1858": 14, "189": [9, 26], "18c3": 20, "19": [10, 11, 17, 20, 30], "190711": 6, "190821": 6, "191": 12, "19142": 12, "192": 14, "1937": 14, "194": [14, 16], "195720": 7, "195769": 7, "1959": 19, "196": 14, "1970": [18, 28], "19a346": 6, "19e241": 6, "1a543": 6, "1d9b91664204low": 19, "1f1ee": 8, "1f1f8": 8, "1f32d": 8, "1f33d": 8, "1f340": 8, "1f346": 8, "1f3e9": 8, "1f3fb": 8, "1f3fc": 8, "1f3fd": 8, "1f3fe": 8, "1f3ff": 8, "1f415": 8, "1f436": 8, "1f48c": 8, "1f499": 8, "1f4d8": 8, "1f535": 8, "1f537": 8, "1f539": 8, "1f7e6": 8, "1f91f": 8, "1f94a": 8, "1f951": 8, "1f954": 8, "1f955": 8, "1f9ae": 8, "1f9ba": 8, "1f9e4": 8, "1fad0": 8, "1mb": 19, "1winner": 19, "2": [6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 27, 28, 30], "20": [1, 4, 6, 9, 12, 14, 17, 18, 19, 20, 23, 26, 28, 29], "200": [7, 10, 14, 20, 23, 26], "2000": 9, "200000": 26, "200689": 17, "2008": 19, "2009": [19, 20], "200d": 8, "2010": 19, "20100101": 6, "2011": 19, "2012": 19, "2013": 19, "2014": [14, 19], "2015": [19, 21, 23], "2016": [19, 21], "2017": [7, 11, 19, 28], "2018": [19, 30], "2019": [19, 30], "201e": 20, "202": 20, "2020": [17, 19, 20, 30], "2021": [11, 17, 19, 30], "2022": [10, 14, 17, 19, 30], "2023": 30, "2024": [7, 11, 14, 30], "203": [12, 14], "203191": 12, "205px": 7, "2074": 14, "207504": 14, "208886": 7, "209": 14, "20pct_off": 24, "21": [6, 7, 9, 11, 14, 17, 18, 19, 20, 23, 24, 30], "210": 9, "2103": 19, "210805": 6, "210817": 6, "211": [14, 16], "2125": 6, "2132": 14, "214": 14, "214891": 7, "215": 7, "216237": 7, "217": 14, "2174": 11, "218": 7, "219": 6, "2190": 14, "22": [7, 14, 17, 19, 30], "220263": 7, "222222": 26, "222242": 7, "223": 14, "2240": 14, "224398": 7, "225": [7, 14], "2250": 6, "226": 14, "2287": 19, "23": [14, 17, 19, 20, 30], "230403": 7, "2311": 6, "232845": 14, "234": 14, "237": 14, "24": [17, 19, 30], "241": 14, "243": [14, 16], "244": 14, "245ecfa321e9": 11, "246": 6, "2486": 6, "249": [14, 16], "24c3": 20, "25": [2, 7, 9, 16, 19, 20, 24, 26, 30], "250": 26, "251437": 7, "252": 12, "2526": 6, "253": 6, "254237": 14, "2547": 14, "2564": 6, "257": 9, "257442": 7, "25israel": 7, "26": [1, 6, 7, 9, 10, 14, 20, 24, 30], "266667": 26, "26837": 10, "27": [7, 10, 17, 19, 28, 30], "270": 2, "2704": 6, "273819": 14, "2743": 6, "2769": 19, "279": 9, "27t17": 19, "28": [1, 17], "2815": 9, "283": 17, "284": 17, "2840": 6, "285": 17, "286": 17, "287": 17, "2883": 6, "289": 17, "29": [7, 9, 10, 14, 19, 30], "290": [14, 17], "291": 17, "2910": 19, "292": [17, 19], "292414": 14, "293": 17, "2950": 19, "2951": 19, "2952": 19, "2953": 19, "2954": 19, "2955": 19, "297": 6, "2984": 19, "299218": 7, "2a": 11, "2ad504a1": 20, "2anam": 24, "2b11866": 6, "2d": [18, 28], "2e3b74": 20, "2e454f": 20, "2e494d": 20, "2e4ccb": 20, "2e77d2": 20, "2e93a0": 20, "2ed585": 20, "2ef5ef": 20, "2f1d9f": 20, "2f6d5c": 20, "2nd": 15, "3": [4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 27, 30], "30": [1, 2, 6, 7, 14, 17, 18, 19, 30], "300": 26, "301": [7, 20], "302": [10, 20], "3021": 19, "3029": 6, "305743": 14, "3071": 6, "31": [6, 14, 16, 19, 30], "310": 7, "3112": 6, "3153": 19, "31536000": [10, 20], "316": 12, "3163": 6, "3166": [18, 28], "317541": 7, "318743": 14, "32": [6, 9, 10, 14, 16, 17, 19, 20], "3202": 6, "321": [9, 12], "3250": 19, "33": [9, 12, 17], "3313": 12, "331414": 14, "333": 9, "333333": 26, "3339": [18, 28], "3395": 12, "34": [6, 12], "341287": 14, "3497": 6, "34be9bf74f00low": 19, "34c3": 20, "35": [12, 14, 20], "350831": 14, "3561": 19, "3578": 6, "3587": 12, "36": [6, 14, 17, 20], "3600": 20, "360375": 14, "3665": 9, "3682": 19, "37": [9, 18, 19, 28], "3729": 6, "373": 19, "375": 16, "375724": 17, "38": [6, 14, 20], "39": [14, 16, 17, 19, 20], "3904": 6, "39687acb": 19, "397": 17, "398": 17, "3987": 6, "399": 17, "3a": 11, "3d": [6, 18, 28], "3f44": 20, "3k": 32, "3rd": 6, "4": [6, 7, 8, 9, 11, 12, 13, 14, 16, 17, 19, 20, 23, 25, 26, 28, 30], "40": [17, 18, 20], "400": [6, 17, 26], "401": 17, "403": 7, "404": [14, 20], "4044": 14, "41": [6, 7, 19], "4183": 6, "419": 6, "41_larg": 11, "41b0": 19, "42": [6, 7, 14], "420": 6, "4224": 19, "42307": [18, 28], "426": 14, "4281": 19, "43": 19, "4312": 12, "4324": 6, "435062": 7, "44": [14, 17], "443": 10, "4430": 14, "444": 9, "45": [6, 14, 18], "450": 18, "4515": 6, "456": [6, 9], "46": [6, 14, 19], "4606": 6, "461037": 19, "461815": 17, "462": 12, "466": 6, "4664": 6, "466e": 19, "468588": 17, "4687": [9, 26], "47": [6, 14], "474456": 17, "4758": 14, "47603": 19, "48": [6, 14, 17, 19], "4883": 19, "488ed635": 19, "49": [9, 12, 14], "491": 19, "49462": 12, "499": 14, "49994": 19, "49995": 19, "49996": 19, "49997": 19, "49998": 19, "49999": 19, "4c69": 19, "4f34": 19, "4f7bea3b": 20, "4k": [6, 9], "4th": 6, "5": [1, 6, 7, 8, 9, 12, 13, 14, 16, 17, 18, 19, 20, 21, 26, 30], "50": [1, 6, 17, 18, 26, 29], "500": [6, 7, 12, 17, 18, 23, 26, 28], "5000": 23, "5004": [9, 26], "500px": 7, "501e": 20, "5050": 19, "5056": 12, "505b": 19, "5065": 19, "5068": 19, "5080": 19, "5081": 19, "5082": 19, "5083": 19, "5084": 19, "5085": 19, "51": [6, 14, 16, 17], "510": 19, "52": [6, 17, 19], "520": [14, 19], "528": 6, "53": [12, 14], "531": 6, "533": [6, 19], "534": 6, "536": [6, 17], "537": [6, 14, 17], "538": [17, 32], "539": 17, "54": [6, 12, 19], "540": 17, "5403": 19, "541": 17, "545": 19, "547": 19, "55": [6, 12, 13, 19], "550": 6, "552": 6, "554": 19, "555": 9, "556": 19, "5590": 11, "56": [6, 13, 14, 16, 17, 19], "563": 9, "565": 19, "57": [13, 17, 19], "572": 9, "5745": 9, "576": [11, 12], "58": [6, 13, 14, 17], "584": 12, "59": [6, 13], "596daca7dbaa7e9": 20, "596daca9b91fd437": 20, "596daca9bcab7e9": 20, "596daca9bddb7ec2": 20, "596daca9bf26d423": 20, "596dacbbb8afd437": 20, "596dacbd980bd423": 20, "596dacbd980cd423": 20, "596dacbd99847ec2": 20, "596dacbd9fb97e9": 20, "5e": 24, "5g": 6, "5km": [18, 28], "5th": 6, "5x": [6, 14], "6": [6, 7, 8, 9, 11, 12, 14, 16, 17, 19, 26, 30], "60": [6, 14, 16, 17], "600000": 26, "6005": 7, "600x800": 6, "601": [6, 9], "602": 6, "604": 6, "604800": [10, 20], "605": 6, "61": 6, "610": 6, "619bd9be1d75db41adee6b58": 19, "62": 6, "620": 6, "6201430a1d75db06ae1f62e8": 19, "620345a15577c23d46622256": 19, "6203cd7b5577c23d19622259": 19, "62067f085577c277dd9acf42": 19, "625": [13, 16], "626": 13, "627": 13, "628": 13, "629": 13, "630": 13, "63124": 19, "632": 19, "635": 19, "638": 19, "639": [12, 18, 23, 28], "64": [6, 9, 17], "640": 19, "645": 19, "650": 6, "6543": 12, "66": [14, 16, 20], "6634db63f453": 11, "6666666666666666": 9, "6666666666666667": 9, "666667": 26, "67": [9, 20], "673": 9, "674": 19, "68": [14, 17, 20], "683": 19, "6853": 12, "69": [6, 14, 20], "6dba2aae6b424107": 10, "6p": 6, "6r1oxxopc_larg": 11, "6th": 6, "7": [6, 7, 8, 9, 12, 14, 16, 17, 19, 23, 26, 30], "70": [6, 14, 20], "700": 11, "701e": 20, "702814": 17, "706": 19, "70e04154a": 6, "71": 6, "716": 14, "71756": 12, "72": [7, 9, 14, 17, 28], "7200": 10, "720a8581": 20, "720p": [18, 28], "727": 19, "728x90": 24, "729": 14, "73": [6, 14], "732559": 7, "733": 9, "74": [6, 16], "7412": 6, "744247": 19, "745": 14, "75": [6, 7, 8, 9, 14, 16, 17, 20], "7549": 12, "755": 19, "75911c9e": 20, "75mi": [18, 28], "76": 17, "769": 19, "77": [14, 17], "78": [6, 17], "789": [6, 9], "79": [6, 14, 17], "7a28ef3b": 20, "7c": [18, 28], "7ec3": 20, "7y4sx": 6, "8": [6, 7, 8, 9, 10, 11, 12, 14, 16, 17, 19, 20, 23, 26, 30], "80": [6, 14, 16, 26], "800000": 26, "801e": 20, "81": 14, "815": 19, "8172": 6, "8192": 28, "82": [6, 16, 20], "83": [6, 7, 14], "84": 6, "841": 19, "841851": 19, "85": 6, "852014": 7, "85855c48": 20, "86": 7, "8601": 28, "86199": 19, "862": 19, "8635": 9, "86400": 10, "872": 19, "874": 19, "875": 16, "8760": 12, "88": 6, "8808": 19, "882": 19, "888889": 26, "8893": 6, "8894": 6, "8895": 6, "8896": 6, "8897": 6, "89": 14, "894": 11, "9": [6, 7, 8, 9, 10, 12, 14, 17, 19, 20, 26, 30], "90": [2, 14], "9044": 19, "908": 14, "90b11f47f8b2ab57cb180cbd3c6f06f9": 19, "91": [6, 16], "913": 19, "914107": 19, "91d0": 19, "92": [6, 14], "933333": 26, "94": [6, 14], "94f1": 19, "95": 7, "950": 6, "951053": 17, "96": [6, 7, 16], "97": [9, 16], "973": 19, "98": [6, 14], "98b729fa": 20, "99": [6, 7, 9, 19, 26], "995323": 19, "99900": 6, "9cbd": 19, "9dfdd38a": 20, "9e64": 19, "A": [1, 2, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 23, 25, 26, 27, 28, 29, 32], "AND": 18, "And": [12, 14], "As": [1, 6, 7, 16, 17, 19, 25, 26, 27, 28, 32], "At": [18, 28], "Being": 10, "But": [6, 20, 26], "By": [14, 18, 20, 26, 28], "For": [1, 4, 6, 7, 10, 12, 14, 17, 18, 19, 20, 23, 25, 26, 28], "If": [2, 6, 9, 12, 14, 18, 19, 20, 23, 25, 26, 28, 29, 30], "In": [1, 2, 6, 7, 9, 11, 12, 14, 16, 17, 18, 19, 20, 23, 25, 26, 28, 29, 32], "It": [6, 9, 10, 12, 13, 14, 16, 18, 19, 20, 23, 25, 26, 28, 32], "NOT": [9, 18, 28], "No": [9, 18], "Not": [8, 23], "ON": 23, "OR": [4, 18, 23, 28, 30], "On": [0, 2, 23, 29, 30, 31], "One": [1, 6, 7, 10, 13, 17, 19, 20, 26], "Or": [11, 13, 20, 25], "THE": 24, "That": [2, 6, 9, 20, 26, 28], "The": [0, 1, 2, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 23, 26, 27, 28, 29, 30, 31, 32], "Then": [6, 20, 23], "There": [6, 7, 9, 10, 14, 18, 19, 20, 23, 25, 29, 32], "These": [6, 14, 20, 26], "To": [2, 11, 13, 17, 20, 26, 28, 30], "With": [10, 19, 23, 26, 32], "_": [7, 19], "__cfduid": 20, "__init__": 29, "_dash": 14, "_dict_product": 29, "_escaped_fragment_": 17, "_rotating_proxi": 6, "_static": 20, "_to_df": 32, "a1": 6, "a320": 19, "a824550933a9": 11, "a850165d925db701988daf7ead7492d3": 17, "abbrevi": 9, "abil": [20, 28, 29], "abl": [10, 12, 17, 18, 32], "about": [2, 6, 7, 8, 9, 10, 11, 14, 16, 17, 18, 19, 20, 23, 25, 26, 28, 29, 30, 32], "abov": [6, 7, 11, 12, 20, 23, 25, 26, 29], "abs_freq": [26, 29], "abs_perc": 26, "abs_perc_cum": 26, "abs_wtd_df": 26, "absolut": [0, 16, 17, 25, 29, 30, 31, 32], "abus": 11, "accept": [6, 10, 18, 20, 28, 29], "access": [4, 11, 14, 17, 18, 21, 23, 28, 32], "access_log": 14, "access_token": 23, "accid": 6, "accomplish": [26, 32], "accord": [2, 17, 27], "accordingli": 7, "account": [0, 13, 17, 18, 23, 26, 28, 29, 30, 31], "acess": 4, "achiev": [6, 27, 32], "across": [6, 7, 8, 12, 18, 19, 25, 26, 32], "act": [18, 28], "action": [28, 29], "activ": [8, 9, 12, 18, 19, 28], "activities_list": 28, "actress": 12, "actriz": 12, "actual": [6, 12, 16, 19, 20, 25, 26, 28], "ad": [0, 4, 7, 9, 13, 20, 23, 24, 26, 28, 29, 30, 31, 32], "ad_": [20, 32], "ad_creat": [1, 13, 20, 29, 32], "ad_from_str": [2, 13, 20, 29, 32], "add": [4, 6, 10, 14, 20, 23, 26], "add_prefix": 14, "addit": [0, 2, 6, 9, 11, 14, 16, 18, 23, 26, 28, 29, 30, 31, 32], "addition": [20, 23], "addr": [14, 16], "address": [14, 16, 20, 29], "addressse": 18, "adgroup": [4, 13], "administrativearea": 12, "adress": [14, 16], "adsbot": 6, "adt": 6, "adv": [1, 2, 4, 6, 7, 8, 9, 10, 11, 13, 14, 16, 17, 19, 20, 21, 23, 25, 26], "adv_error": 14, "adv_log": 14, "adv_logs_fin": 14, "advantag": [7, 14], "adventur": 28, "adver": [10, 14, 20], "advertis": 7, "advertoo": 20, "advertool": [1, 2, 6, 7, 8, 9, 10, 11, 13, 14, 16, 17, 19, 20, 21, 23, 25, 26], "advimagespipelin": 11, "aerialmagzc": [9, 26], "afaa7cb5e636low": 19, "affect": 20, "afghan": 19, "afghanistan": 19, "afraid": [9, 26], "aft": 6, "after": [2, 4, 6, 7, 9, 11, 18, 19, 20, 23, 25, 26, 27, 28, 32], "afterward": 26, "aftwmst22": 6, "ag": [10, 20], "again": [6, 14, 19, 20, 25, 26], "against": [12, 23, 26], "agent": [0, 5, 10, 11, 14, 16, 19, 20, 29, 30, 31, 32], "aggreg": 12, "ahrefsbot": 14, "ai": 19, "aid": [8, 32], "aim": 32, "ajax": 17, "album": 7, "alert": 10, "algarv": 1, "algorithm": 32, "alias": 12, "aliaslist": [14, 16], "align": 25, "all": [2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17, 18, 19, 20, 23, 25, 26, 28, 29, 32], "allow": [1, 2, 4, 7, 10, 14, 17, 18, 20, 23, 25, 28, 29, 32], "allowed_domain": [4, 6, 7, 20], "allthreadsrelatedtochannelid": 28, "almost": [7, 14, 26], "alon": 26, "along": [9, 23, 26], "alpha": [18, 28], "alphabet": [12, 18, 28], "alreadi": [6, 26], "also": [1, 2, 4, 6, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 23, 25, 26, 27, 28, 29, 32], "alt": [7, 20, 23, 29], "alt_href": [6, 7, 20], "alt_hreflang": [6, 7, 20], "altern": [6, 17, 20, 29], "although": [2, 20, 26, 32], "altogeth": 14, "alwai": [2, 23, 26, 29], "am": [8, 26], "amazon": [6, 17, 19, 23], "america": 28, "america_latina": 19, "american": 28, "among": 26, "amongst": 26, "amount": [20, 25, 26], "amp": 19, "amplifi": 23, "amz": 10, "an": [1, 2, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 23, 26, 27, 28, 29, 30, 32], "analysi": [0, 9, 10, 16, 19, 20, 23, 29, 31], "analyt": [0, 25, 26, 29, 30], "analyz": [0, 10, 18, 20, 21, 23, 26, 29, 30, 31, 32], "anayz": 7, "anchor": [6, 20], "anderson": 19, "android": [6, 14, 19], "andypayn": 29, "ani": [2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17, 18, 19, 20, 23, 25, 26, 27, 28, 29, 32], "anim": [8, 9], "ann": [9, 26], "annot": 15, "announc": 32, "anonym": 14, "anoth": [2, 7, 10, 13, 14, 19, 25, 26, 27, 32], "anotherexampl": 20, "anotherexmapl": 20, "answer": 19, "anyhow": 26, "anymor": 29, "anyon": 26, "anyth": [6, 23, 26, 29], "anywai": [13, 26], "anywher": [23, 26], "apache_error": [4, 14], "api": [0, 6, 7, 9, 18, 29, 30, 31, 32], "api_vers": 23, "app": [17, 18, 23, 28, 32], "app_kei": [23, 29], "app_secret": [23, 29], "appear": [18, 19, 20, 23, 26, 29], "append": [2, 17, 18, 20, 29], "appl": [6, 17, 26], "applebot": 17, "appletv11": 6, "appletv5": 6, "appletv6": 6, "applewebkit": [6, 14], "appli": [17, 18, 19, 20], "applic": [10, 12, 14, 18, 20, 23, 28], "appliedprivaci": 14, "approach": [0, 13, 25, 26, 29, 30, 31], "appropri": 28, "ar": [2, 4, 6, 7, 9, 10, 11, 12, 13, 14, 17, 18, 19, 20, 23, 25, 26, 27, 28, 29, 30, 32], "arab": [4, 9, 18, 19, 21, 29], "arbitrari": [9, 23, 29], "archiv": 19, "area": [9, 18, 26, 28, 32], "aren": 13, "arg": [10, 11], "argument": [4, 6, 12, 18, 20, 29], "armenian": 9, "armv7l": 6, "around": [8, 9, 26], "arpa": [14, 16], "art": [7, 28], "articl": [0, 7, 19, 25, 29, 30, 31, 32], "articlebodi": 12, "articlelarg": 19, "artilc": 25, "ascend": [7, 26], "asia": 28, "ask": [9, 18, 23, 29], "aspect": 7, "asset": [7, 11], "associ": [18, 26, 28], "assum": [18, 20, 23, 29], "assur": 10, "astronaut": 23, "async": 7, "attach": [18, 23, 28], "attack": 19, "attempt": 20, "attent": 16, "attitud": 23, "attr": 20, "attract": 28, "attribut": [6, 7, 18, 20, 23, 26, 29], "au": 18, "audit": [0, 5, 17, 19, 20, 30], "auditdetail": 28, "australia": [7, 18], "australialett": 7, "auth_endpoint": 23, "auth_param": 23, "authent": [0, 16, 18, 28, 29, 30, 31], "author": [6, 18, 20, 23, 25, 28], "author_url": 20, "auto": [7, 11], "autocomplet": [19, 32], "autom": [6, 10, 17, 32], "automat": [0, 5, 18, 28, 30, 32], "autothrottl": 29, "autothrottle_en": [10, 11], "autothrottle_target_concurr": [10, 11], "avail": [2, 4, 6, 7, 10, 11, 12, 14, 15, 18, 19, 20, 21, 23, 25, 26, 28, 29, 32], "availab": 7, "availablel": 7, "averag": 7, "avocado": 8, "avoid": [11, 25], "aw": 19, "await": 28, "axi": 14, "axr96": 11, "azerbaijani": [4, 21, 29], "b": [9, 10, 14], "b023": 19, "b0aef497": 20, "b12a6923b6ad9102b766352261b1a847": 7, "b3vzy214dhg6odlld29rmgrsdfgt": 6, "b935": 19, "bY": 2, "back": [7, 14, 23, 25, 26, 32], "backend": [10, 20], "bad": 29, "bag": 26, "baiduspid": [14, 17], "ban": 6, "banana": 26, "bandetectionmiddlewar": 6, "bandwidth": 10, "banner": 24, "barcelona": 13, "base": [0, 2, 5, 8, 10, 11, 15, 18, 20, 23, 26, 28, 29, 30, 32], "base64": 11, "basebal": 28, "basi": 10, "basic": [6, 10, 13, 16, 19, 25, 32], "basketbal": [8, 28], "batteri": 6, "bbc": [17, 19], "bbc_sitemap": 19, "beacon": 10, "bearer": 23, "beat": 26, "beauti": 28, "becam": 26, "becaus": [7, 12, 14, 16, 17, 20, 23, 25, 26, 32], "becom": [2, 4, 14, 20, 25, 26], "been": [6, 19, 23, 26, 28], "beer": 23, "befor": [6, 9, 18, 20, 23, 26, 28], "beforehand": 26, "begin": [23, 27, 29], "behalf": [18, 28], "behavior": [0, 4, 10, 11, 19, 28, 29, 30, 31, 32], "behaviour": [11, 20], "behind": 26, "being": [6, 18, 19, 20, 23, 25, 26, 28, 29, 32], "bell": 11, "belong": [12, 16, 19, 23], "below": [12, 18, 20, 26, 27, 28], "ben": 19, "benefit": [2, 25, 32], "bengali": [4, 21, 29], "benton": 19, "besid": 26, "best": [18, 23, 26], "better": [6, 7, 19, 20, 23, 25, 29, 32], "between": [4, 6, 7, 13, 14, 18, 19, 20, 26, 27, 28, 32], "beyond": 26, "bid": 24, "big": [6, 13, 14, 25], "bill": [12, 18], "bin": 7, "bing": [6, 12], "bingbot": [6, 14, 17], "bitcoin": 9, "black": [18, 20, 26], "blob": 20, "block": [6, 14, 17, 19, 20, 29, 32], "blockblob": 20, "blocked_url": 14, "blog": 25, "bloomberg": 32, "blowjob": [9, 26], "blown": 26, "blue": [8, 9, 18, 25, 26, 28], "blueberri": 8, "bmw": [1, 13], "boat": [18, 28], "bodi": [8, 9, 10, 20, 23, 32], "body_text": [20, 29], "book": [8, 26], "bool": [1, 2, 7, 9, 12, 13, 18, 19, 20, 23, 25, 26], "boolean": [7, 18, 20, 28], "boost": 18, "boss": [9, 26], "bot": [6, 14, 16], "both": [7, 9, 13, 18, 20, 23, 26, 27, 28, 32], "bottom": [13, 26, 32], "bounc": [20, 26], "box": [8, 17, 19, 28], "brace": 1, "bradford": [9, 26], "brand": [9, 12, 14, 19], "brandingset": 28, "break": [9, 19, 23, 26, 29, 32], "brief": 7, "broad": [4, 13], "broadcast": [18, 28], "broken": 23, "brown": 18, "browser": [6, 12], "bud": 20, "bug": [19, 29], "bui": [4, 13, 17], "build": [6, 8, 10, 14, 32], "builder": [0, 29, 30, 31], "built": 32, "bulgarian": 18, "bulk": [0, 29, 30, 31], "bunch": [7, 20], "buscador": 12, "busi": [19, 28], "butt": 19, "buzz": 11, "buzzfe": 11, "byte": [10, 19, 20, 29], "c": [4, 6, 10, 21], "c01e": 20, "c2": 6, "c2coff": 18, "c_fill": 19, "ca": [18, 26], "cach": [10, 20], "call": [1, 18, 19, 20, 23, 25, 26, 29, 32], "camp": 20, "campaign": [0, 1, 4, 24, 29, 30, 31], "campaign_nam": [4, 13], "can": [0, 2, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 23, 25, 26, 27, 28, 29, 30, 32], "can_fetch": 17, "cannot": [23, 26], "canon": [6, 7, 10, 20, 29], "canonical_par": 6, "cantant": 12, "capit": [1, 2, 4, 12, 13], "capitalize_adgroup": [13, 29], "captial": 2, "caption": [7, 18, 28, 29], "captions_list": 28, "captur": [19, 32], "car": 1, "caramel": [9, 26], "card": [20, 23, 29], "card_uri": 23, "care": 17, "career": [13, 17], "carrot": 8, "case": [1, 2, 4, 6, 7, 8, 9, 10, 11, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 29, 32], "casual": 28, "cat": 8, "catalan": [4, 18, 21, 29], "categor": [14, 25], "categori": [1, 18, 19, 25, 28], "categoryid": 28, "cater": 14, "caus": [23, 29], "cb8db05df7e7": 11, "cc_attribut": 18, "cc_noncommerci": 18, "cc_nonderiv": 18, "cc_publicdomain": 18, "cc_sharealik": 18, "cdn": 10, "celeb": 11, "celebr": 19, "center": [6, 18, 28], "certain": [0, 4, 5, 7, 9, 10, 12, 14, 16, 17, 19, 20, 21, 25, 26, 29, 30], "certainli": 14, "certif": 13, "cgi": 10, "chain": [7, 20], "chanc": 12, "chang": [0, 2, 4, 5, 7, 11, 14, 17, 18, 19, 20, 26, 32], "changefreq": 19, "channel": [18, 28, 29, 32], "channel_df": 18, "channel_id": 18, "channel_sections_list": 28, "channelid": [18, 28], "channelplaylistvideo": 28, "channels_list": 28, "channelsect": 28, "channeltyp": [18, 28], "char": 1, "charact": [2, 4, 9, 18, 23, 26, 27, 28, 29], "characterisit": 14, "characterist": 7, "charset": [6, 7, 10, 20, 29], "chart": 28, "chat": [9, 26], "cheap": 13, "cheat": 29, "check": [6, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 26, 32], "checker": [0, 29, 30, 31], "chelseastewart": 11, "child": 28, "china": 19, "chines": [4, 18, 21, 28, 29], "chinoi": 7, "choic": 8, "choke": [9, 26], "chokkattu": 19, "choos": [18, 20, 28], "chose": 14, "chosen": 14, "christian": 28, "christma": 19, "chrome": [6, 12, 14], "chromebook": 6, "chromecast": 6, "chronolog": [18, 28], "chunk": 7, "chunksiz": 7, "cinta": 1, "circl": 8, "circular": [18, 28], "citi": [1, 12], "ckaiserjr": 9, "claim": [11, 16], "clarif": 29, "clarifi": 29, "class": [10, 11, 20], "classic": 28, "classifi": 28, "clean": 32, "clear": [12, 13, 23], "clearli": [13, 25], "cli": [0, 29, 30, 31], "click": 32, "client": 14, "client_arg": 23, "cline": 19, "clipart": 18, "close": 6, "closedcapt": [18, 28], "closespider_errorcount": [6, 20], "closespider_itemcount": [6, 20], "closespider_pagecount": [4, 6, 7, 20], "closespider_timeout": [4, 6, 20], "cloth": 8, "cloudflar": [10, 20], "cloudfront": 19, "clover": 8, "club": [13, 32], "clue": 17, "cm": [18, 28], "cn": 18, "cnn": 7, "co": [9, 26], "code": [0, 4, 6, 7, 8, 9, 11, 12, 14, 18, 19, 20, 23, 24, 28, 29, 30, 31, 32], "code_recip": [0, 29, 30, 31], "codepoint": 8, "coder": 32, "coffe": [9, 26], "cohort": [10, 32], "col1": 7, "col2": 7, "collect": [6, 15, 17, 18, 23, 28, 29], "collin": 19, "collinss": 19, "colliss": 25, "color": [18, 20, 25, 26], "column": [0, 4, 6, 9, 10, 11, 12, 13, 14, 17, 18, 19, 20, 23, 25, 26, 29, 30, 31], "column_regex": 7, "columns_typ": 7, "com": [4, 6, 7, 8, 9, 10, 11, 12, 14, 16, 17, 18, 19, 20, 23, 24, 25, 28], "comand": 16, "combin": [4, 12, 13, 14, 15, 17, 18, 20, 32], "come": [2, 13, 14, 26], "comma": [18, 23, 26, 27, 28], "command": [6, 7, 14, 16, 17, 29, 30], "comment": [17, 20, 28, 29, 32], "comment_threads_list": 28, "comments_list": 28, "commentthread": 28, "commerc": [13, 26], "common": [4, 6, 7, 10, 14, 18, 19, 28], "common_with_vhost": [4, 14], "commun": [6, 25, 32], "compani": [9, 12, 17], "compar": [7, 19, 25, 29], "comparison": [7, 12, 16], "comparison_df": 7, "compat": [6, 14, 23], "compil": 15, "complet": [9, 13, 18, 23, 25, 26, 28], "complex": [14, 23], "complic": 23, "compon": [10, 14, 23, 25, 29], "comprehens": 32, "compress": [0, 14, 25, 29, 30, 31], "comput": [6, 14, 20], "concat": [14, 20, 29], "concaten": 19, "concurr": [0, 5, 18, 28, 29, 30], "concurrent_item": 6, "concurrent_request": 6, "concurrent_requests_per_domain": [6, 20], "concurrent_requests_per_ip": 6, "condit": [0, 5, 11, 14, 20, 30], "conduct": 17, "confid": 20, "configur": [20, 28], "conform": 14, "conformig": 14, "conglomer": 12, "congression": 19, "conjunct": [18, 23, 28], "connect": [6, 7, 17, 23, 29, 32], "consecut": [6, 20], "consid": [7, 9, 23, 28, 32], "consider": [2, 26, 32], "consist": [2, 6, 13, 14, 23, 25, 26, 29], "consol": [0, 18, 25, 28, 30], "constantli": 19, "constrain": [18, 28], "consum": [7, 10, 23, 25], "consumed_onli": 23, "consumpt": 7, "contaboserv": 14, "contain": [4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17, 18, 19, 20, 23, 25, 26, 27, 28, 29, 32], "content": [4, 6, 7, 10, 12, 17, 18, 19, 20, 21, 25, 26, 28, 29, 31], "contentdetail": 28, "contentownerdetail": 28, "contenturl": [7, 12], "context": [7, 9, 12, 15], "continu": [10, 32], "contrast": 9, "contribut": 29, "contributornameid": 20, "control": [0, 5, 10, 11, 18, 20, 29, 30, 32], "conveni": 32, "convert": [2, 7, 14, 20, 25, 29, 32], "cookbook": 15, "cool": 32, "coordin": [18, 28], "copi": [0, 5, 30, 32], "copyright": [20, 21], "core": 14, "corn": 8, "coronaviru": 19, "corpor": 12, "corpu": 26, "correct": [9, 12, 14, 29], "correctli": 2, "correspond": 19, "cost": 28, "could": [7, 9, 10, 13, 18, 26, 28, 29], "count": [0, 1, 7, 8, 9, 14, 16, 17, 19, 23, 27, 29, 30, 31, 32], "counti": 19, "countri": [8, 18, 20, 25, 28, 29, 32], "countryau": 18, "countryuk": 18, "cours": [7, 10, 13, 26, 32], "coverag": 29, "covid": 19, "cpu": [6, 7, 14], "cq": 19, "cr": 18, "crash": 20, "crawl": [0, 5, 10, 11, 16, 17, 25, 29, 30, 31, 32], "crawl_df": [10, 20], "crawl_head": [10, 29], "crawl_imag": [11, 29], "crawl_logs_df": 14, "crawl_logs_to_df": 14, "crawl_subset": 7, "crawl_tim": [10, 20], "crawldf": [6, 7], "crawled_url": 7, "crawler": [0, 6, 10, 14, 29, 30, 31, 32], "crawllogs_to_df": [14, 29], "crawlyt": [7, 29], "cream": [9, 26], "creat": [0, 6, 7, 8, 10, 11, 12, 13, 18, 23, 25, 28, 29, 30, 31, 32], "creation": 32, "creativ": [18, 28], "creativecommon": [18, 28], "credenti": [12, 18, 23, 28], "credibl": 23, "credittext": 7, "cricket": 28, "crio": 6, "criteria": 28, "criterion": 13, "critic": [12, 18], "crkei": 6, "cro": 6, "croatian": [4, 18, 21, 29], "crossorigin": [20, 29], "crowd": 18, "cse": 29, "css": [0, 4, 6, 29, 30, 31, 32], "css_link": 6, "css_selector": [4, 7, 20, 29], "csv": [9, 11, 14, 26], "ct": [10, 20], "ctbv": 7, "ctrl": 6, "cultur": 19, "cultura_sociedad": 19, "cum_count": [14, 16], "cum_perc": [14, 16], "cumul": [16, 26], "cup": [9, 26], "curat": 28, "currenc": [0, 26, 29, 30, 31, 32], "currency_summari": 9, "currency_symbol": 9, "currency_symbol_count": 9, "currency_symbol_freq": 9, "currency_symbol_nam": 9, "currency_symbols_flat": 9, "currency_symbols_per_post": 9, "current": [6, 7, 14, 19, 20, 23, 28, 29, 32], "cursor": 23, "custom": [0, 4, 5, 10, 11, 14, 18, 29, 30, 31, 32], "custom_set": [4, 6, 10, 11, 14, 20, 29], "customiz": 20, "cutom_set": 6, "cx": 18, "czech": 18, "c\u00f3mo": 9, "d": [4, 7, 9, 10, 14, 18, 26], "d4889b15": 19, "d74930cf": 19, "d76b68d148ddec1efd004": 20, "d9646265": 14, "d99f2368": 20, "d_placeholder_thescen": 19, "dai": [6, 9, 18, 23, 26, 28], "daili": 7, "dalvik": 6, "danielp77": 29, "danish": [4, 18, 21, 29], "dark": 8, "dash": [14, 29], "dash_html_compon": 14, "dashboard": [23, 32], "dashboardom": 10, "data": [0, 4, 9, 10, 11, 13, 17, 18, 19, 25, 26, 29, 30, 31, 32], "databas": [8, 9, 14, 15, 29, 32], "datacamp": 32, "datafram": [0, 6, 7, 8, 9, 11, 12, 13, 17, 18, 19, 20, 23, 25, 26, 29, 30, 31, 32], "dataset": [9, 20, 25, 26, 32], "datatyp": 7, "date": [14, 17, 18, 19, 20, 23, 28, 29, 32], "date_format": [14, 29], "daterestrict": 18, "datetim": [14, 18, 19, 28, 29], "datetime64": 19, "david": 19, "db": 29, "dd": 23, "ddthh": 28, "de": [1, 6, 12, 13, 20], "deal": [7, 32], "death": 19, "debug": [14, 18], "debut": 19, "dec": 19, "decid": [19, 20, 23], "decis": [10, 19, 32], "decod": 25, "decrib": 20, "deep": 20, "default": [0, 2, 4, 5, 7, 9, 12, 13, 14, 18, 19, 20, 23, 25, 26, 27, 28, 29, 30, 32], "default_request_head": [6, 20, 29], "defeat": 19, "defin": [7, 12, 14, 18, 26, 28, 29], "definit": [18, 28], "deflat": [6, 10, 20], "delai": 17, "delet": [7, 29], "delimit": [25, 27], "deliveri": 32, "demot": [18, 28], "denot": 17, "depend": [7, 9, 10, 12, 13, 19], "dependon": 7, "deprac": 29, "deprec": [28, 29], "depth": [0, 5, 10, 20, 30], "depth_limit": [6, 20], "desc_text": 2, "descend": [18, 28], "describ": [7, 13, 26], "descript": [0, 6, 7, 9, 12, 13, 20, 23, 25, 26, 28, 29, 30, 31, 32], "design": [13, 17, 32], "desir": [6, 7, 14, 23], "desktop": [6, 14], "destin": [14, 25], "detail": [2, 4, 6, 7, 10, 11, 13, 14, 17, 18, 20, 23, 28, 32], "detaileddescript": 12, "detect": 23, "determin": [0, 13, 18, 26, 29, 30, 31], "develop": [6, 8, 12, 17, 18, 23, 28], "df": 29, "df1": 7, "df2": 7, "df_subset": 7, "dfp": 7, "di": [0, 5, 11, 30], "diamond": 8, "dict": [9, 10, 11, 14, 19, 20], "dict_kei": [8, 9, 21], "dictionari": [4, 6, 8, 9, 10, 19, 20, 21, 29], "did": [9, 19, 26], "didn": 32, "diff": 7, "diff_perc": 7, "differ": [2, 4, 6, 7, 8, 9, 12, 13, 14, 15, 17, 18, 19, 20, 23, 25, 26, 28, 29, 32], "differenti": 24, "difficult": 13, "digit": [0, 32], "dimens": [18, 28], "dir_1": [19, 25], "dir_2": [19, 25], "dir_3": [19, 25], "dir_4": 19, "dir_5": 19, "dir_6": 19, "dir_7": 19, "direct": [10, 17], "directli": 23, "directori": [0, 11, 29, 30, 31], "dirti": 11, "disabl": [7, 18], "disabled0": 18, "disallow": [17, 20], "disambigu": 23, "discordbot": 17, "discount": 13, "discov": [6, 14, 20, 25, 32], "discoveri": [0, 29, 30, 31], "discreet": 23, "discuss": [9, 26], "diseas": 19, "disk": [7, 14], "dislik": 28, "displai": [9, 14, 17, 18, 28], "dist": 23, "distanc": [18, 28], "distinct": 23, "distinguish": [14, 20], "distort": 25, "distribut": 7, "district": 19, "divers": 32, "divid": [2, 26], "dn": [0, 14, 29, 30, 31], "do": [0, 2, 4, 5, 7, 9, 10, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 28, 29, 30, 32], "doc": [4, 7, 12, 23], "document": [7, 10, 12, 18, 20, 23, 26, 29], "documentaion": 20, "doe": [2, 6, 8, 9, 16, 17, 18, 19, 20, 26, 27, 28, 32], "doen": 14, "doesn": [6, 13, 14, 20, 26], "dog": 8, "dollar": [9, 27], "domain": [0, 4, 5, 7, 10, 14, 16, 17, 18, 20, 25, 29, 30], "domin": 18, "don": [0, 1, 4, 5, 7, 9, 10, 14, 18, 19, 20, 25, 26, 29, 30, 32], "done": [6, 7, 10, 13, 14, 18, 19, 25, 26, 32], "dot": [26, 27], "dotbot": 14, "doubl": [7, 29], "doug": 32, "down": [0, 5, 13, 14, 26, 29, 30, 31, 32], "downgrad": 10, "download": [0, 6, 7, 10, 17, 20, 25, 28, 29, 30, 31, 32], "download_d": [17, 19, 29], "download_delai": [6, 20], "download_func": 11, "download_lat": [7, 10, 20], "download_slot": [10, 20], "download_timeout": [10, 20], "download_timout": 20, "downloader_middlewar": 6, "downsiz": 11, "dp8hsntg6do36": 19, "dr": 14, "draggabl": [20, 29], "drake": 11, "dress": 11, "drink": [8, 9, 26], "drive": 12, "drizzl": [9, 26], "drop": 29, "drop_dupl": 17, "dt": 11, "dtype": [12, 14, 17, 19], "dubai": 1, "due": [9, 14, 23, 26, 28], "duplic": [6, 16, 18, 20, 23], "durat": [18, 28], "dure": [18, 26], "dutch": [4, 18, 21, 29], "duti": 19, "dvp": 6, "dwgyu36up6iuz": 19, "dynam": [10, 20], "e": [1, 4, 6, 7, 10, 12, 13, 18, 20, 24, 25, 26, 28, 29], "e01": 20, "e6653": 6, "e7e15811c65f406f89f89fe10aef29f5": 19, "ea6298160040": 11, "each": [1, 2, 4, 7, 8, 9, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 27, 28, 29, 32], "ear": 8, "earliest": 28, "earth": 23, "easi": [6, 10, 13, 19, 25, 29, 32], "easier": [6, 9, 13, 19, 20, 23, 25, 29, 30, 32], "easiest": 19, "easili": [2, 6, 7, 10, 14, 17, 19, 21, 23, 25, 32], "east": 7, "economi": 11, "ed": 15, "edg": 6, "editor": 19, "educ": 13, "ee0djx6z511tgx88": 10, "effect": [14, 23, 25], "effici": [7, 10, 14, 16, 20, 25], "effort": 23, "eggplant": 8, "eight": 26, "eighti": 17, "either": [7, 9, 13, 14, 18, 19, 23, 26, 28, 29, 32], "elabor": 25, "elaps": 7, "elect": 19, "electron": 28, "elemenst": 11, "element": [0, 2, 6, 7, 9, 10, 12, 29, 30, 31, 32], "element_1": 29, "element_2": 29, "eleven": 26, "eli": 20, "eliasdabba": 8, "elig": 12, "eln": 19, "els": [2, 12, 23, 26], "elsewher": 26, "email": [7, 12, 24], "emb": 28, "embed": [18, 23, 28], "embedd": [18, 28], "embedhtml": 28, "emerg": 6, "emo": 20, "emoji": [0, 20, 26, 29, 30, 31, 32], "emoji_": 32, "emoji_count": [8, 9], "emoji_df": [8, 29], "emoji_entri": 8, "emoji_flat": [8, 9], "emoji_flat_text": [8, 9], "emoji_freq": [8, 9], "emoji_nam": 9, "emoji_per_post": [8, 9], "emoji_raw": 8, "emoji_search": [8, 29, 32], "emoji_summari": [8, 9], "emoji_text": [8, 9], "emot": [8, 9], "empti": [2, 6, 7, 8, 9, 25, 26, 29], "en": [4, 6, 7, 10, 12, 19, 20, 23, 25, 28], "en_u": 28, "enabl": [12, 18, 29], "encod": [6, 10, 14, 20, 23, 24, 25, 29], "encount": [4, 14, 25], "encourag": 23, "end": [2, 7, 9, 10, 18, 20, 25, 27, 29, 32], "engag": 23, "engin": [0, 10, 12, 13, 14, 17, 19, 29, 30, 31, 32], "english": [4, 18, 21, 25, 26, 29], "enhanc": 25, "enjoi": [9, 26], "enough": [2, 13, 18, 26, 28], "ensur": [2, 20], "enter": [18, 20], "entertain": 28, "entir": [18, 23], "entire_words_onli": 9, "entiti": [0, 12, 17, 19, 23, 29, 30, 31, 32], "entri": 8, "env": 14, "environ": 23, "episod": [18, 28], "equal": [4, 14, 23, 25], "equival": [16, 20], "errback": 10, "errno": [14, 16], "error": [1, 4, 6, 14, 16, 18, 20, 28, 29], "errors_fil": [4, 14], "escap": [18, 28], "espada": 1, "especi": [2, 6, 13, 17, 19, 26], "essenti": [26, 27, 32], "estim": 7, "estonian": 18, "est\u00e1": 9, "etag": [17, 19, 29], "etaospid": 17, "etc": [6, 7, 8, 9, 12, 13, 14, 15, 18, 19, 20, 21, 25, 26, 27, 29, 32], "eu": 6, "eur": 9, "euro": 9, "europ": 7, "european": 32, "evalu": 12, "even": [7, 14, 17, 18, 19, 20, 23, 26, 28, 29], "event": [14, 18, 19, 25, 28], "eventtyp": [18, 28], "ever": [18, 26], "everi": [7, 14, 17, 19, 20, 23, 26, 28, 32], "everyon": [9, 26], "everyth": [12, 23, 26], "everywher": 26, "exact": [4, 6, 13, 23], "exactli": [2, 7, 8, 28], "exactterm": 18, "examl": 4, "exampl": [1, 2, 4, 6, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 27, 28, 29, 32], "example_crawl_1": 6, "example_output": 4, "excalam": 9, "exce": [1, 2], "excel": 32, "except": [6, 23, 25, 26], "excit": [9, 26], "exclam": [0, 27, 29, 30, 31], "exclamation_mark": 9, "exclamation_mark_count": 9, "exclamation_mark_freq": 9, "exclamation_mark_nam": 9, "exclamation_marks_flat": 9, "exclamation_marks_per_post": 9, "exclamation_summari": 9, "exclamation_text": 9, "exclud": [4, 11, 13, 18, 20, 23, 25, 28, 29], "exclude_repli": 23, "exclude_url_param": [4, 7, 20, 29], "exclude_url_regex": [4, 7, 20, 29], "excludeterm": 18, "exclus": [18, 28], "exec": 17, "exist": [7, 8, 9, 13, 14, 23, 29], "exit": [4, 14], "exmapl": [6, 7, 10, 20, 25], "expand": 29, "expect": [8, 10, 11, 20], "expens": 10, "experi": 26, "experiment": [11, 29], "explain": [7, 19, 32], "explan": 20, "explanatori": 25, "explicitli": [18, 23, 25], "explod": 19, "explor": [0, 8, 9, 19, 20, 21, 23, 29, 30, 31], "exploratori": [6, 20], "explosionai": 21, "export": 20, "exposur": 19, "express": [0, 4, 5, 7, 8, 9, 11, 13, 14, 17, 18, 20, 26, 29, 30, 31, 32], "ext_alt_text": 23, "extend": [14, 23], "extens": [6, 14, 17, 18, 25, 29], "extern": [7, 10, 14], "extra": [2, 14, 26], "extra_info": 26, "extract": [0, 5, 7, 14, 17, 25, 29, 30, 31, 32], "extract_": [9, 29, 32], "extract_curr": [9, 29], "extract_emoji": [8, 9, 29], "extract_exclam": [9, 29], "extract_hashtag": [9, 29], "extract_intense_word": [9, 29], "extract_ment": [9, 29], "extract_numb": [9, 29], "extract_quest": [9, 29], "extract_url": [9, 29], "extract_word": [9, 29], "extrem": [6, 10, 13, 20, 32], "ey": 17, "f": [4, 14], "f1rkxqh4kmipzviovwnszequmfjmvlb3": 11, "f53301c8286f9bf59ef297f0232dcfc1": 19, "face": [8, 18], "facebook": [0, 17, 24, 26, 29, 30, 31], "facebookbot": 14, "facebookexternalhit": 17, "failur": 10, "fairli": [25, 32], "fall": [12, 18, 23, 28], "fallback": 1, "fals": [1, 2, 6, 7, 9, 13, 14, 17, 19, 20, 23, 25, 26, 29], "famili": [14, 23], "familiar": 19, "fashion": 28, "fast": [10, 16, 29], "faster": [19, 29], "fastest": 19, "fatal": 19, "favorit": [1, 9, 23, 26], "fb_robot": 17, "fb_test": 17, "fb_userag": 17, "fce856744ed8": 11, "fe546b9b": 19, "featur": [2, 11, 18, 19, 23, 28, 29, 32], "feb": [10, 14], "februari": [19, 28], "feed": [0, 28, 29, 30, 31], "feedback": 30, "feedfetch": 6, "feel": [8, 9, 26, 29], "fetch": [4, 17, 19, 20, 29], "few": [6, 7, 9, 10, 11, 14, 19, 20, 25, 26, 32], "fewer": 2, "ffill": 29, "field": [4, 11, 14, 18, 28, 29], "fifteen": [13, 26], "fifti": 26, "figur": [17, 20, 23, 32], "file": [0, 1, 6, 10, 16, 18, 19, 20, 25, 29, 30, 31, 32], "file_path": 11, "filedetail": 28, "filepath": [4, 7, 20], "filetyp": 18, "fill": 18, "fillna": 29, "filter": [6, 7, 14, 18, 19, 23, 25, 28, 29], "filter_to_owned_list": 23, "final": [13, 19, 25, 28, 29], "find": [6, 8, 9, 18, 20, 23, 25, 26, 28, 32], "fine": 20, "finger": 8, "finish": 6, "finnish": [4, 18, 21, 29], "fire": [6, 9, 26], "firefox": 6, "first": [2, 6, 7, 9, 12, 17, 18, 19, 20, 23, 25, 26, 27, 28, 29, 32], "fish": [18, 28], "fit": [2, 28, 29], "five": [2, 17, 20, 25, 26], "fix": [14, 20, 29], "fl_progress": 19, "flag": [8, 9], "flat": [8, 9], "flavor": 9, "flex": 17, "flexibl": [20, 32], "flight": [18, 23], "fligt": 18, "float": [18, 28], "float64": 19, "focu": [13, 16, 32], "focus": 18, "folder": [6, 11], "follow": [0, 2, 4, 5, 7, 8, 9, 14, 18, 19, 21, 23, 25, 26, 28, 29, 30, 31, 32], "follow_link": [4, 6, 7, 14, 20, 29], "followers_count": [9, 26], "food": [8, 9, 28], "footbal": [8, 13, 19, 28, 32], "footer": [6, 20, 29], "footer_links_href": 6, "footer_links_text": [6, 20], "footer_links_url": 20, "footnot": 21, "forc": 23, "forchannelid": 28, "forcontentown": [18, 28], "fordevelop": [18, 28], "form": [7, 9, 12, 26], "format": [0, 1, 2, 7, 10, 11, 13, 15, 17, 18, 20, 23, 28, 29, 30, 31, 32], "former": 26, "formerli": 26, "formin": [18, 28], "formula": 32, "forth": [18, 23, 28], "forti": [26, 32], "forusernam": 28, "forward": 10, "found": [6, 7, 11, 14, 18, 19, 23], "four": [8, 9, 17, 18, 20, 25, 26, 28], "fr": [12, 18, 20], "fraction": [7, 20], "frag_1": 25, "frag_2": 25, "fragment": [19, 25], "frame": 13, "franc": 18, "free": 18, "freebas": [18, 28], "freixo": 1, "french": [4, 18, 21, 29], "freq": 19, "frequenc": [0, 8, 9, 16, 29, 30, 32], "frequent": [7, 19, 26], "fri": 10, "friend": 23, "from": [0, 5, 7, 11, 12, 14, 15, 16, 17, 18, 19, 20, 23, 25, 26, 27, 28, 29, 30, 31, 32], "front": [9, 26], "fruit": 8, "ft": [18, 19, 28], "fucntion": 9, "full": [1, 6, 7, 8, 10, 11, 13, 19, 20, 23, 26, 29, 32], "fulli": [8, 9, 23], "func": 23, "funcion": 12, "function": [0, 2, 4, 6, 8, 10, 11, 12, 13, 16, 17, 18, 19, 25, 26, 27, 29, 30, 31, 32], "further": [20, 25, 26, 32], "futur": [23, 28], "fxio": 6, "g": [1, 4, 6, 10, 18, 20, 24, 28, 29], "g8231": 6, "g892a": 6, "g920v": 6, "g928x": 6, "g930vc": 6, "g935": 6, "g960f": 6, "g973u": 6, "g980f": 6, "g996u": 6, "g_face": 19, "gadget": 2, "gain": [7, 19, 30], "galaxi": [6, 19], "game": 28, "garda": 16, "gecko": [6, 14], "gen": 6, "gener": [0, 6, 9, 14, 17, 20, 24, 25, 26, 28, 29, 30, 31, 32], "genr": 19, "geocod": 23, "geograph": [18, 28], "geoloc": 18, "geometr": 8, "georgia": 19, "geotag": 23, "german": [4, 18, 21, 29], "gestur": 8, "get": [0, 1, 2, 6, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 23, 25, 26, 27, 28, 29, 30, 31, 32], "get_application_rate_limit_statu": 23, "get_available_trend": 23, "get_favorit": 23, "get_followers_id": 23, "get_followers_list": 23, "get_friends_id": 23, "get_friends_list": 23, "get_home_timelin": 23, "get_list_memb": 23, "get_list_membership": 23, "get_list_status": 23, "get_list_subscrib": 23, "get_list_subscript": 23, "get_mentions_timelin": 23, "get_place_trend": [23, 29], "get_retweet": 23, "get_retweeters_id": 23, "get_supported_languag": 23, "get_user_timelin": 23, "gideon": 19, "gif": 11, "give": [6, 7, 9, 10, 13, 16, 18, 20, 23, 25, 26], "given": [2, 6, 11, 17, 18, 19, 23, 25, 29], "gl": [6, 18], "global": [20, 23, 29], "glove": 8, "gmail": 12, "gmbh": 21, "gmt": [10, 20], "go": [4, 9, 10, 11, 13, 14, 17, 18, 19, 20, 25, 26, 29], "goe": [6, 19], "golf": 28, "goo": 6, "good": [2, 4, 6, 9, 14, 16, 19, 20, 25, 26], "googl": [0, 4, 6, 14, 16, 17, 25, 28, 29, 30, 31, 32], "google_robot": 4, "googlebot": [6, 14, 16, 17], "googleoth": 6, "googleproduc": 6, "googletagmanag": 6, "googtwfb": 17, "got": 20, "gp": 17, "gr": 14, "grai": 18, "gram": [0, 4, 29, 30, 31], "granular": 20, "graph": [0, 20, 29, 30, 31], "graphic": 13, "grayscal": 18, "great": [1, 9, 20, 25, 26, 32], "greater": 23, "greek": [4, 9, 18, 21, 29], "green": [9, 18, 26], "grin": 8, "group": [2, 4, 7, 8, 9, 13, 17, 29, 32], "groupbi": 17, "gtm": 6, "gtm_noscript": 6, "gtm_script": 6, "guarante": 23, "guid": [8, 28], "guide_categories_list": [28, 29], "guidecategori": 28, "guitar": 13, "gunicorn": 10, "gwnlj8m99yumucgdd6ytm": 10, "gx12": 2, "gz": [19, 29], "gzip": [6, 10, 20], "h": [4, 7, 9, 14, 29], "h1": [6, 7, 20], "h2": [6, 7, 20, 29], "h3": [6, 7, 10, 20], "h4": 6, "h5": 6, "h6": [6, 7, 20], "h_180": 19, "ha": [2, 6, 7, 9, 13, 16, 17, 19, 20, 23, 25, 26, 28, 29, 32], "haaatttteee": 9, "had": [6, 8, 9, 26], "haftungsbeschr\u00e4nkt": 21, "hahahahahahaha": [9, 26], "haiku": 23, "half": 26, "hama": 7, "han": [18, 28], "hand": [8, 12, 13], "handl": [0, 6, 14, 16, 17, 18, 23, 29, 30, 31, 32], "handler": 14, "hannahdobro": 11, "hant": [18, 28], "happen": [6, 14, 25], "happi": [14, 23], "hard": [0, 5, 30], "hash": 25, "hashtag": [0, 15, 17, 23, 26, 29, 30, 31, 32], "hashtag_count": 9, "hashtag_freq": 9, "hashtag_raw": 15, "hashtag_summari": 9, "hashtags_flat": 9, "hashtags_per_post": 9, "hate": 23, "hauptstadt": 12, "have": [1, 2, 4, 6, 7, 8, 9, 10, 12, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 27, 28, 29, 32], "hbcz": 7, "hc": 7, "hd": [18, 28], "hdx": 6, "he": [6, 9, 19, 26], "head": [6, 7, 8, 9, 10, 13, 14, 17, 19, 20, 26, 29, 30], "header": [0, 4, 5, 7, 17, 20, 29, 30, 31, 32], "header_links_href": 6, "header_links_text": [6, 20], "header_links_url": 20, "headers_df": 10, "headers_spid": 10, "headersspid": 10, "headlin": 32, "health": [2, 28], "heart": 8, "hebrew": [4, 18, 21, 29], "height": [7, 11, 20, 28, 29], "heldforreview": 28, "hello": [8, 9, 17, 20], "help": [4, 6, 7, 9, 10, 14, 16, 18, 19, 20, 23, 25, 27, 29, 32], "helper": [8, 27], "henc": 26, "her": 26, "here": [2, 7, 8, 9, 10, 13, 14, 18, 19, 20, 25, 26, 32], "hereaft": 26, "herebi": 26, "herein": 26, "hereupon": 26, "herself": 26, "heru80fdn": 19, "hfubv4v3ai": [9, 26], "hi": [9, 19, 26], "hidden": 26, "hierarchi": 12, "high": [6, 12, 18, 19, 20, 28], "higher": [12, 18, 19, 28, 29], "highest": [18, 28], "highli": [7, 18, 20, 28, 32], "highrang": 18, "hilari": 23, "him": 26, "himself": [19, 26], "hindi": [4, 19, 21, 29], "hip": 28, "hit": [0, 5, 10, 20, 30], "hl": [18, 28], "ho": 12, "ho4kx7zz24": [9, 26], "hobbi": 28, "hockei": 28, "hodgeman": 19, "hol": 14, "hola": 9, "home": [4, 17, 28], "home_timelin": 23, "honnib": 21, "hood": 10, "hop": 28, "hope": [9, 26, 32], "hopefulli": 32, "hopewel": 19, "host": [14, 16, 18, 29], "host_df": [14, 16], "hostnam": [14, 16, 25], "hot": [8, 9], "hotel": [1, 8, 18], "hour": [6, 23], "hous": 19, "how": [0, 2, 5, 7, 8, 9, 10, 13, 17, 19, 20, 23, 25, 26, 29, 30, 31, 32], "howev": [14, 18, 23, 26, 28], "howsearchwork": 17, "hp": 17, "hq": 18, "href": [6, 20, 29], "hreflang": [6, 10, 29], "ht": 20, "htc": 6, "hte": 19, "htm": [6, 14], "html": [4, 6, 7, 10, 14, 17, 19, 20, 23, 25, 28, 29], "http": [4, 6, 7, 8, 9, 10, 11, 12, 14, 17, 19, 20, 23, 25, 26], "httperror_allow_al": [10, 11], "huge": 18, "hulu": 19, "humor": 28, "hundr": [10, 20, 26], "hungarian": [4, 18, 21, 29], "hurri": 6, "husband": [9, 26], "hydrat": 23, "hyphen": 2, "i": [0, 1, 2, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 27, 28, 29, 30, 32], "i18n_languages_list": 28, "i18n_regions_list": 28, "i18nlanguag": 28, "i18nregion": 28, "ia_archiv": 17, "ibmo9hrztai": 10, "ic": [9, 26, 28], "iceland": 18, "icj": 7, "icon": [7, 18], "id": [6, 7, 10, 12, 18, 20, 23, 28], "idea": [6, 7, 9, 13, 20, 26, 32], "ideal": [17, 25, 26, 32], "ident": 23, "identifi": [7, 18, 20, 23, 25, 28], "ifram": 6, "iframe_src": 6, "ignor": 26, "il": 6, "imag": [0, 6, 10, 12, 14, 17, 18, 19, 20, 23, 29, 30, 31], "image_df": 7, "image_dir": 11, "image_loc": [11, 19], "image_spid": 11, "image_url": 11, "imagespid": 11, "imagespipelin": 11, "img": [7, 11, 20, 29], "img_": [7, 20], "img_alt": [7, 20], "img_bord": 7, "img_decod": 7, "img_df": 7, "img_height": 7, "img_load": 7, "img_siz": 7, "img_src": [7, 20], "img_srcset": 7, "img_summari": 7, "img_width": 7, "imgcolortyp": 18, "imgdominantcolor": 18, "imgitem": 11, "imgr": 17, "imgsiz": 18, "imgtyp": 18, "immedi": [7, 17, 23], "implement": 29, "import": [0, 1, 2, 6, 7, 8, 9, 10, 11, 13, 14, 16, 17, 19, 20, 21, 23, 25, 26, 27, 28, 29, 30, 31, 32], "importantli": [14, 25, 32], "imposs": [7, 25], "improv": [18, 26, 29], "inc": 12, "includ": [0, 4, 5, 7, 9, 11, 13, 14, 17, 18, 19, 20, 23, 25, 27, 28, 29, 30], "include_card_uri": 23, "include_ent": 23, "include_ext_alt_text": 23, "include_img_regex": 11, "include_rt": 23, "include_url_param": [4, 7, 20, 29], "include_url_regex": [4, 7, 20, 29], "include_user_ent": 23, "includesubdo": 20, "includesubdomain": 10, "inclus": [18, 28], "inconsist": 25, "inde": 26, "independ": [7, 28, 32], "indepent": 20, "index": [0, 4, 7, 10, 12, 14, 18, 20, 23, 29, 31, 32], "india": 19, "indic": [6, 10, 17, 18, 20, 23, 25, 28, 29], "individu": [18, 28], "indonesian": [4, 18, 21, 29], "industri": [11, 17, 18, 19, 32], "inermedi": 7, "infer": 9, "influenti": 23, "info": [11, 14, 18, 29], "inform": [7, 8, 10, 12, 14, 16, 18, 19, 20, 23, 25, 26, 28, 29], "inherit": 12, "initi": [6, 12, 29], "input": [1, 2], "insensit": [8, 29], "insert": [1, 25], "insid": 27, "insight": [0, 6, 19, 26, 29, 30, 31, 32], "inspectiontool": 6, "instagram": 23, "instal": [4, 6, 30], "instanc": [9, 12, 29], "instant": [0, 29, 30, 31], "instead": [4, 9, 17, 20, 23, 25, 26, 28, 29], "instruct": [17, 18, 28], "int": [1, 7, 9, 11, 12, 13, 16, 18, 19, 23, 26, 29], "int64": [7, 14, 17, 19, 29], "intact": 2, "integ": [2, 18, 28], "integr": [13, 32], "intel": 6, "intend": [18, 23, 28], "intens": [9, 29], "intent": [13, 32], "interact": [19, 23, 29, 32], "interest": [7, 9, 10, 11, 12, 13, 14, 17, 19, 20, 26, 30], "interfac": [10, 18, 23, 29, 30, 32], "interior": 23, "intermedi": 7, "intern": [7, 10, 18, 19, 20, 23], "internacion": 19, "internal_url_regex": 7, "internation": 18, "internet": 19, "interpret": 7, "interv": [6, 32], "interview": 19, "introductori": 32, "invalid": 29, "invert": 9, "inverview": 19, "investig": 6, "invideopromot": 28, "io": [10, 16, 20, 23], "iowa": 19, "ip": [14, 16, 18, 20, 29], "ip_address": [14, 16, 20], "ip_host_dict": 14, "ip_list": [4, 16], "ipaddress": 6, "ipaddrlist": [14, 16], "iphon": 6, "iphone12": 6, "iphone13": 6, "iphone14": 6, "iphone7c2": 6, "iphone9": 6, "ir": 16, "ir88": 19, "iran": 19, "iraq": 19, "irish": [4, 21, 29], "ismap": [20, 29], "iso": [12, 18, 23, 28], "israel": 7, "issu": [6, 7, 29, 32], "italian": [4, 18, 21, 29], "item": [6, 7, 11, 13, 18, 20, 23, 25, 28, 29], "item_a": 20, "item_b": 20, "item_pipelin": 11, "iter": 2, "its": [0, 1, 4, 5, 7, 8, 10, 12, 13, 14, 19, 20, 23, 25, 26, 28, 30, 32], "itself": [14, 20, 25, 26], "ivborw0kggoaaaansuheugaaafqaaaa7camaaadsf118aaaap1bmveuaaadigxpohbk5ewdfghi5fwi8grteghe7eqdmhr7": 11, "iyl50": 10, "j": [6, 14], "j706x": 6, "j8110": 6, "ja": 23, "janeiro": 13, "japanes": [4, 18, 21, 29], "java": 14, "javascript": 6, "jazz": 28, "jdq39": 6, "jenni": 9, "ji": 12, "jin": 12, "jl": [4, 6, 7, 10, 14, 17, 20, 29], "jl_filepath": 7, "jl_subset": [7, 29], "jl_to_parquet": [7, 29], "jo": 4, "job": [0, 5, 7, 13, 29, 30, 32], "jobdir": 6, "joejoinerr": 29, "john": [9, 26], "join": [9, 20, 26], "josephlongo": 11, "josh": 11, "jpeg": 10, "jpg": [7, 11, 19], "jpy": 9, "js_script_src": 6, "js_script_text": 6, "json": [7, 10, 14, 18, 20, 23, 29], "json_norm": [14, 29], "jsonld": 7, "jsonld_": [7, 20], "jsonld_1_": 20, "jsonld_error": 29, "jsonlin": [7, 10, 20], "julian": 19, "jung": 12, "jungl": 12, "just": [2, 4, 8, 9, 11, 14, 18, 20, 23, 26, 32], "k": [14, 26], "k1": 6, "ka": 19, "kaggl": [8, 32], "kang": 12, "kansa": 19, "kazakh": [4, 21, 29], "keep": [1, 6, 7, 8, 12, 13, 16, 17, 20, 23, 26, 29, 32], "keep_equ": 7, "kei": [6, 8, 9, 12, 18, 19, 20, 21, 23, 25, 26, 28, 29], "kept": 32, "key_nam": 9, "keyword": [0, 8, 12, 20, 21, 26, 29, 30, 31, 32], "keywords_df": 13, "kfpdr3hupi": [9, 26], "kfthwi": 6, "kg_df": 12, "khtml": [6, 14], "kid": [9, 26], "kill": 6, "kilomet": [18, 23, 28], "kind": [4, 10, 20, 29, 32], "kindl": 6, "kiwi": 26, "km": [18, 23, 28], "know": [2, 7, 9, 13, 14, 19, 20, 25, 26, 32], "knowledg": [0, 28, 29, 30, 31], "knowledge_graph": [12, 29], "known": [10, 20, 23, 26, 29], "korea": 12, "korean": [12, 18], "ktu84m": 6, "kw_": [20, 29, 32], "kw_broad": 13, "kw_df": 13, "kw_exact": 13, "kw_gener": [13, 20, 29, 32], "kw_modifi": 13, "kw_neg_broad": 13, "kw_neg_exact": 13, "kw_neg_phras": 13, "kw_phrase": 13, "kwarg": [9, 10, 11], "kxlspoodchrqwiwbxi85q6kc9pnehscmhj0vjgppuac3lwqo": 11, "l": 4, "l1": 20, "lab": 19, "label": 13, "lamborghini": 1, "land": [2, 13, 32], "lang": [17, 23], "lang_": 18, "lang_ar": 18, "lang_bg": 18, "lang_c": 18, "lang_ca": 18, "lang_d": 18, "lang_da": 18, "lang_el": 18, "lang_en": 18, "lang_et": 18, "lang_fi": 18, "lang_fr": 18, "lang_hr": 18, "lang_hu": 18, "lang_i": 18, "lang_id": 18, "lang_it": 18, "lang_iw": 18, "lang_ja": 18, "lang_ko": 18, "lang_lt": 18, "lang_lv": 18, "lang_nl": 18, "lang_no": 18, "lang_pl": 18, "lang_pt": 18, "lang_ro": 18, "lang_ru": 18, "lang_sk": 18, "lang_sl": 18, "lang_sr": 18, "lang_sv": 18, "lang_tr": 18, "lang_zh": 18, "languag": [0, 6, 10, 12, 13, 18, 19, 20, 23, 25, 26, 28, 29, 30, 31, 32], "laptop": 6, "larg": [0, 6, 8, 14, 16, 18, 19, 20, 26, 29, 30, 31, 32], "larger": [18, 23, 28], "last": [1, 2, 9, 17, 19, 20, 25, 26, 29], "last_dir": [19, 25, 29], "lastmod": 19, "lat": 23, "latenc": [7, 10], "later": [0, 5, 14, 19, 30], "latest": [1, 2, 4, 20, 23], "latin": [14, 28], "latitud": [18, 23, 28], "latter": 26, "latterli": 26, "latvian": 18, "layout": [14, 28], "ld": [7, 10, 20, 29], "lead": 18, "leaf": 8, "learn": [13, 19, 20, 26, 28, 32], "leas": 20, "least": [18, 26, 28], "leav": [2, 17], "left": [6, 9, 28], "left_char": 9, "len": [2, 4], "lenght": 29, "length": [1, 2, 10, 19, 23, 25, 26, 27, 29], "lenovo": 6, "less": [18, 20, 23, 26, 28], "let": [1, 2, 7, 9, 12, 13, 17, 18, 19, 20, 26, 28, 32], "letter": [2, 7, 8, 18, 28], "level": [6, 14, 18, 20, 23, 25, 28, 32], "level_or_nam": 18, "lg": 6, "li": [20, 29], "lib": 7, "librari": 32, "licens": [12, 18, 28], "life": [9, 26, 30], "lifestyl": 28, "light": [8, 10], "like": [6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 18, 19, 20, 23, 26, 28, 29, 32], "likelyspam": 28, "lilguyisback": 9, "limit": [1, 2, 12, 17, 18, 20, 23, 28, 32], "line": [1, 6, 7, 10, 12, 14, 16, 17, 18, 20, 29, 30], "lineart": 18, "lineup": [9, 26], "link": [0, 4, 5, 9, 10, 18, 19, 23, 25, 28, 29, 30, 31, 32], "link_df": 7, "link_rel_href": 6, "link_rel_rel": 6, "link_rel_stylesheet": 6, "linkedin": 17, "linkedinbot": [14, 17], "links_": 7, "links_frag": 29, "links_href": 20, "links_nofollow": [20, 29], "links_text": [20, 29], "links_url": [20, 29], "linksit": 18, "linux": [6, 14, 29], "lisbon": 1, "list": [0, 1, 2, 5, 7, 8, 10, 11, 12, 13, 14, 16, 17, 18, 19, 21, 23, 25, 26, 27, 28, 29, 30, 31, 32], "list_id": 23, "liter": 12, "lithuanian": 18, "littl": [11, 25, 30, 32], "live": [18, 28, 32], "livestreamingdetail": 28, "ll": [9, 17, 23, 26], "lmy47o": 6, "lmy47x": 6, "load": [14, 20, 22, 29], "loc": 19, "local": [4, 18, 23, 28], "locat": [11, 18, 19, 23, 25, 28, 32], "locationradiu": [18, 28], "log": [0, 5, 16, 18, 19, 20, 25, 28, 31, 32], "log_error": 14, "log_field": 14, "log_fil": [4, 6, 14, 20], "log_format": [4, 14], "logic": 18, "login": [14, 17], "logo": [7, 11], "logs_df": 14, "logs_file_path": 14, "logs_to_df": [0, 29, 30, 31], "lokal": 12, "lol": [9, 26], "lon": 23, "long": [0, 1, 4, 7, 9, 14, 16, 17, 18, 23, 25, 28, 29, 30, 31, 32], "longdesc": [20, 29], "longer": [1, 2, 9, 18, 26, 28, 29], "longitud": [18, 23, 28], "look": [2, 7, 13, 19, 23, 26], "lookout": 10, "lookup": [0, 14, 23, 29, 30, 31], "lookup_statu": 23, "lookup_us": 23, "looooooovvvve": 9, "looooooveee": 9, "loop": [18, 23, 29], "loos": 9, "lose": 17, "lost": [2, 6], "lot": [2, 7, 10, 14, 19, 25, 32], "love": [8, 23, 26], "love_emoji": 8, "lower": 20, "lowest": [18, 28], "lowrang": 18, "lr": 18, "lrx22g": 6, "lumia": 6, "luxuri": 13, "m": [4, 9, 14, 17, 18, 19, 20, 26, 28, 32], "m9": 6, "ma": [10, 20], "mac": 6, "machin": [4, 19, 32], "macintosh": 6, "made": [7, 9, 16, 23, 26], "mai": [14, 17, 18, 20, 23, 26, 28], "mail": 16, "main": [2, 7, 12, 17, 18, 19, 20, 23, 25, 26, 29, 32], "mainli": [6, 11, 13, 16, 17, 26, 27], "mainten": 10, "major": [14, 17, 19, 23, 32], "make": [0, 1, 2, 5, 7, 9, 10, 14, 16, 17, 18, 19, 20, 23, 25, 26, 29, 30, 32], "make_datafram": 23, "mammal": 8, "manag": [6, 7, 10, 18, 23, 28, 32], "managedbym": 28, "mango": 26, "mani": [1, 2, 6, 7, 8, 9, 11, 12, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 28, 32], "manipul": 32, "manner": [12, 25], "manual": 6, "map": [4, 7, 9, 12, 13, 20, 23, 32], "march": 19, "mark": [9, 23, 25, 26, 27, 29], "market": [0, 12, 13, 18, 20, 24, 29], "martial": 28, "marvinmilton2": [9, 26], "masscan": 14, "massiv": [7, 14, 16, 25], "master": [6, 10, 20], "match": [4, 7, 8, 9, 11, 12, 13, 14, 18, 19, 20, 23, 28, 29], "match_typ": 13, "matter": [4, 13], "matthew": 21, "max": [4, 6, 10, 20], "max_column": 14, "max_id": 23, "max_len": [1, 4, 13], "max_work": [16, 19, 29], "maxheight": 28, "maxim": 23, "maximum": [1, 2, 12, 13, 16, 18, 19, 20, 23, 28], "maxresult": [18, 28], "maxwidth": 28, "mayb": [9, 11, 13, 17], "mb": 29, "mcilroi": 32, "mckinlei": 19, "mckinleyd": 19, "me": [26, 27], "mean": [7, 9, 10, 12, 13, 14, 17, 18, 19, 20, 23, 25, 26, 32], "meaning": [19, 26], "meanwhil": 26, "measur": [6, 18, 28, 29], "media": [8, 9, 11, 14, 20, 21, 23, 25, 26], "mediapartn": 6, "medium": [8, 18, 24, 28], "meet": [9, 18, 26, 28], "mega": 19, "megabyt": 32, "mem": 7, "member": 23, "membership": 23, "memori": [7, 14, 20, 25], "mention": [0, 12, 15, 23, 25, 26, 29, 30, 31, 32], "mention_count": 9, "mention_freq": 9, "mention_raw": 15, "mention_summari": 9, "mentions_flat": 9, "mentions_per_post": 9, "mentions_timelin": 23, "merced": 1, "merg": [18, 20, 23, 29], "messag": [4, 14, 29], "meta": [6, 10, 18, 20, 23, 25], "meta_desc": [6, 7, 20], "metadata": [18, 23, 28, 29], "metallica": [9, 26], "metatag": 10, "method": [10, 14, 18, 20, 23, 28, 29, 30], "method_from": 14, "method_to": 14, "metric": 26, "mi": [18, 23, 28], "microsoft": 6, "middl": [7, 29], "middleeast": [7, 19], "middlewar": [6, 14], "might": [2, 4, 6, 7, 8, 9, 10, 11, 13, 14, 17, 18, 19, 20, 23, 25, 26, 28, 30, 32], "mile": 23, "militari": 28, "miller": 19, "million": 26, "min": [7, 14], "min_height": 11, "min_rep": 9, "min_width": 11, "mind": [6, 8, 12, 20, 23, 32], "mine": [21, 26, 27, 28, 32], "mini": 10, "minim": 29, "minimum": 11, "minix": 6, "minnesota": 19, "minor": [14, 29], "minu": [7, 20], "minut": [18, 19, 25, 28], "miss": [2, 9, 19, 26, 29], "missouri": 19, "mistak": 17, "mix": [23, 25, 28], "mj12bot": 14, "mm": [19, 23, 28], "mmb29k": 6, "mmb29p": [6, 14], "mmb29t": 6, "mobil": [6, 14], "mocha": [9, 26], "mode": [0, 4, 5, 9, 23, 29, 30, 31], "model": [2, 14, 19], "model_a": 20, "model_b": 20, "moder": [18, 28], "moderationstatu": 28, "modifi": [4, 8, 9, 10, 11, 13, 17, 19, 20, 21, 23, 26], "modul": [1, 9, 20, 26, 29, 30, 31, 32], "modular": 32, "moment": 7, "monitor": 32, "mono": 18, "month": [18, 19, 23], "more": [2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17, 18, 19, 20, 23, 25, 26, 28, 29, 32], "moreov": 26, "morn": [7, 9, 26], "morning_144x144": 7, "most": [1, 2, 6, 7, 9, 10, 13, 14, 18, 19, 20, 23, 26, 28, 29, 32], "mostli": [6, 7, 20, 25, 26, 32], "mostpopular": 28, "motorsport": 28, "move": 26, "movi": [18, 19, 23, 28], "mozilla": [6, 14, 20], "mp4": 19, "mpu": 24, "mra58k": 6, "msg": 29, "msnbot": 17, "much": [7, 19, 20, 26, 29], "multi": [16, 18], "multi_robot": 4, "multimedia": [7, 19], "multin": 12, "multipl": [0, 4, 5, 7, 9, 12, 17, 18, 19, 20, 29, 30, 32], "multipli": 26, "mundo": 19, "music": 28, "must": [14, 18, 23, 26, 28], "my": [0, 1, 5, 9, 20, 26, 30], "my_output_fil": 20, "myrat": 28, "myrecentsubscrib": 28, "myself": 26, "mysit": 24, "mystuff": 17, "mysubscrib": 28, "n": [0, 4, 6, 7, 19, 29, 30, 31], "na": [25, 29], "name": [0, 1, 4, 6, 7, 8, 9, 10, 12, 13, 14, 16, 17, 18, 19, 20, 23, 24, 25, 26, 28, 29, 30, 31, 32], "name_1": 20, "name_2": 20, "nan": [6, 7, 10, 12, 14, 19, 20, 25, 29], "narrow": 28, "nasa": 23, "nat": [17, 19], "natali": 19, "nativ": 23, "native_video": 23, "natur": [8, 9, 11], "nav": [6, 20, 29], "nav_links_href": 6, "nav_links_text": [6, 20], "nav_links_url": 20, "naverbot": 17, "navig": [11, 23], "ncov": 19, "nearli": [9, 26], "nebraska": 19, "need": [1, 2, 4, 6, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 23, 26, 29, 32], "neg": [9, 13, 23], "neither": 26, "neo": 6, "nepali": [4, 21, 29], "nest": [23, 28], "net": [14, 19], "netloc": [19, 25], "network": [9, 25], "never": [9, 26], "nevertheless": 26, "new": [0, 6, 7, 9, 14, 18, 25, 26, 29, 30, 31, 32], "newest": [19, 28], "newlin": 29, "news_keyword": 19, "news_publ": 19, "news_publication_d": 19, "news_titl": 19, "newslett": 7, "next": [9, 13, 20, 26, 28, 32], "next_cursor": 23, "nextpagetoken": [18, 28, 29], "nexu": [6, 14], "nf": 6, "nfl": 19, "nginx": [10, 20], "nginx_error": [4, 14], "ngram": 26, "nh": 19, "nine": 26, "nintendo": 6, "nintendobrows": 6, "nippli": [9, 26], "nix": 29, "nl": 14, "nmf26f": 6, "nobodi": 26, "node": 23, "nofllow": 20, "nofollow": [7, 20, 29], "noindex": 10, "non": [14, 18, 26, 27, 28, 29, 32], "none": [2, 4, 7, 8, 9, 10, 11, 12, 14, 17, 18, 19, 20, 23, 24, 25, 26, 28], "noon": 26, "nor": 26, "normal": [6, 14, 21], "norwegian": [4, 18, 21, 29], "nose": 19, "note": [2, 7, 9, 12, 18, 20, 23, 25, 26, 28, 32], "notebook": 32, "noth": [9, 26], "notic": [9, 17], "noticia": 19, "notna": 7, "notset": 18, "noun": 13, "now": [1, 2, 7, 9, 13, 14, 17, 19, 20, 23, 26, 29], "nowher": 26, "np": [19, 29], "nrd90m": 6, "nt": [6, 14, 20], "null": 23, "num": 18, "num_currency_symbol": 9, "num_emoji": [8, 9], "num_exclamation_mark": 9, "num_hashtag": 9, "num_list": [26, 29], "num_ment": 9, "num_numb": 9, "num_post": [8, 9], "num_question_mark": 9, "num_url": 9, "num_word": 9, "number": [0, 5, 7, 8, 12, 13, 16, 17, 18, 19, 20, 23, 26, 28, 29, 30, 31, 32], "number_count": 9, "number_freq": 9, "number_list": 4, "number_of_emoji": 8, "number_of_hashtag": 9, "number_of_ment": 9, "number_of_numb": 9, "number_of_symbol": 9, "number_of_url": 9, "number_of_word": 9, "number_separ": 9, "number_summari": 9, "numbers_flat": 9, "numbers_per_post": 9, "numer": [7, 23, 28], "nutch": 14, "nvidia": 6, "nx": 6, "nyt": [7, 19], "nyt_crawl": 7, "nyt_new": 19, "nytim": [7, 19], "nz": 18, "o": [6, 7, 15], "oauth_token": [23, 29], "oauth_token_secret": [23, 29], "oauth_vers": 23, "obama": 19, "obei": [0, 5, 11, 14, 30], "obido": 17, "object": [8, 9, 12, 14, 19, 20, 23, 28, 32], "obtain": [23, 25, 26], "occur": [9, 19, 20, 23, 26, 28, 29], "occurr": [9, 26], "odai": 9, "off": [6, 18, 26], "offer": [13, 23], "offic": [9, 26], "offlin": 29, "often": 26, "og": [6, 7, 20, 29], "og_cont": 6, "og_prop": 6, "ohio": 19, "ok": 10, "okai": 9, "old": 7, "older": 23, "oldest": 23, "omit": 23, "onbehalfofcontentown": [18, 28], "onbehalfofcontentownerchannel": 28, "onc": [2, 4, 6, 7, 12, 13, 14, 18, 19, 20, 23, 25, 26, 28, 30, 32], "one": [1, 2, 4, 6, 7, 8, 9, 12, 14, 15, 17, 18, 19, 20, 23, 25, 26, 28, 29, 32], "ones": [8, 9, 10, 14, 16, 19, 20, 26], "ongo": [18, 28], "onli": [0, 1, 4, 5, 7, 9, 10, 11, 14, 15, 17, 18, 19, 20, 23, 24, 26, 28, 29, 30, 32], "onlin": [2, 20], "onto": 26, "opd1": 6, "opd3": 6, "open": [14, 17, 19, 20, 29], "opengraph": [6, 10], "oper": [14, 18, 19, 23, 28, 29], "oppos": [13, 25, 29], "opposit": 9, "opt": [7, 23], "optim": [10, 12], "option": [2, 6, 7, 9, 10, 11, 13, 14, 17, 18, 20, 21, 23, 25, 26, 27, 28, 29, 32], "orang": [18, 26], "order": [4, 7, 9, 11, 12, 13, 18, 20, 28, 29], "order_matt": 13, "org": [4, 7, 8, 10, 12], "organ": 12, "origin": [6, 11, 14, 18, 20, 25, 29], "orterm": 18, "other": [2, 4, 7, 8, 9, 10, 11, 12, 14, 15, 17, 18, 19, 20, 23, 25, 26, 28, 29, 32], "otherwis": [7, 14, 20, 23, 25, 26], "ottawa": 19, "ound": 9, "ouput_fil": 25, "our": [2, 7, 9, 14, 23, 26], "ourl0aefg76m8izrt6eaaaaasuvork5cyii": 11, "ourselv": [14, 26], "out": [1, 9, 12, 14, 17, 18, 20, 23, 25, 26, 28, 32], "outpuf_fil": 20, "output": [4, 6, 7, 10, 11, 14, 20, 23, 25, 29], "output_dir": 11, "output_fil": [4, 6, 7, 10, 14, 17, 20, 25, 29], "output_file1": 7, "output_file2": 7, "outreach": 32, "outsid": [9, 14, 18, 26, 28], "over": [4, 6, 10, 19, 20, 23, 26, 29], "overview": [2, 7, 8, 9, 16, 19, 32], "overwrit": [20, 29], "owen": 19, "own": [7, 9, 13, 14, 18, 23, 25, 26, 28, 32], "owner": [17, 18, 23, 28], "owner_id": 23, "owner_screen_nam": 23, "ownership": 23, "oyl2nhr7e34yujtjw6zmc3am": 11, "p": [20, 29], "packag": [6, 7, 20, 21, 29, 30, 31, 32], "pad": 6, "page": [0, 2, 4, 5, 7, 9, 10, 11, 12, 13, 14, 17, 19, 21, 23, 25, 26, 28, 29, 30, 31, 32], "page_1": 6, "page_2": [6, 7], "page_3": 6, "page_4": 6, "page_5": 7, "page_7": 7, "page_8": 7, "page_9": 7, "pagelet": 17, "pagemap": 29, "pagepostssectionpagelet": 17, "pagetoken": [18, 28], "pageview": [14, 26], "pagin": [29, 32], "pai": 18, "paid": [9, 26], "pair": 23, "pakistan": 19, "pam": 19, "pamper": [9, 26], "pand": 29, "panda": [7, 8, 9, 10, 12, 13, 14, 17, 18, 19, 20, 23, 25, 26, 29, 32], "panel": 18, "paper": 8, "param": 4, "paramet": [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 23, 24, 26, 27, 28, 29, 30, 31, 32], "parent": [6, 28], "parenthes": 27, "parentid": 28, "parmet": [4, 20], "parquet": [0, 14, 25, 29, 30, 31], "parquet_column": [7, 29], "parquet_fileapth": 7, "parquet_filepath": 7, "pars": [0, 10, 11, 20, 29, 30, 31, 32], "parser": 32, "part": [7, 9, 13, 16, 18, 21, 25, 26, 27, 28, 32], "parti": [6, 23], "partial": 8, "particular": [7, 17, 18, 19, 28], "particularli": [18, 20], "partner": [18, 28], "pass": [6, 12, 17, 18, 19, 20, 23, 32], "password": 6, "password123": 6, "past": [18, 32], "patch": 14, "patch_minor": 14, "path": [0, 6, 7, 10, 11, 14, 17, 19, 20, 29, 30, 31], "path_1": 25, "path_2": 25, "path_3": 25, "pather": 7, "patienc": 17, "pattern": [0, 4, 6, 7, 9, 11, 14, 17, 19, 25, 26, 30], "paus": [0, 5, 30], "pc": 6, "pd": [6, 7, 9, 10, 14, 17, 19, 20, 25, 26, 29], "peck": 11, "penguinnyyyyi": [9, 26], "peopl": [8, 9, 12, 23, 26, 32], "per": [4, 6, 7, 8, 9, 17, 18, 19, 23, 26, 32], "perc": [14, 16], "percentag": [7, 16, 26, 29], "perform": [7, 14, 16, 18, 19, 20, 28], "perhap": 26, "period": 10, "periscop": 23, "permiss": 17, "permut": [4, 13], "perry_ron": 9, "persian": [4, 19, 21, 29], "person": [9, 12, 18, 26], "perspect": [20, 25, 26], "pet": 28, "petalbot": 14, "phone": [6, 9], "photo": [18, 23], "php": 17, "phrase": [13, 18, 23, 26, 27, 29], "phrase_len": [4, 26, 27, 29], "physic": 28, "pic": 23, "pick": 32, "pictur": [19, 26], "pid": 7, "piec": 10, "pink": 18, "pinterest": 17, "pinterestbot": 17, "pip": [4, 6, 30, 32], "pip3": 30, "pipe": [18, 26, 28], "pipelin": [16, 32], "pixel": [6, 11], "place": [1, 6, 8, 9, 11, 12, 23, 29], "placehold": [29, 32], "plai": [8, 12, 18, 19, 28], "plain": 28, "plaintext": 28, "plan": [7, 14], "plant": 8, "platform": [2, 26, 32], "playback": [18, 28], "player": [6, 28], "playlist": [18, 28, 29], "playlist_items_list": 28, "playlistid": 28, "playlistitem": 28, "playlists_list": 28, "playstat": 6, "pleas": [4, 7, 10, 12, 18, 20, 23, 26, 28], "plotli": 32, "plu": 6, "png": [10, 11, 19, 20], "podcast": [7, 19], "point": [7, 8, 12, 18, 25, 28], "pointer": 16, "polici": [7, 10], "polish": [4, 18, 21, 29], "polit": [6, 23, 25, 28], "pop": 28, "popul": 25, "popular": [6, 14, 23, 28, 32], "port": [6, 25], "porto": 1, "portug": 1, "portugues": [4, 18, 19, 21, 29], "posicionamiento": 12, "posit": [4, 7, 9, 12, 18, 23, 28], "possibl": [13, 14, 17, 18, 26, 32], "possibli": 26, "post": [8, 9, 21, 23, 25, 26, 30, 32], "posts2": 9, "potato": 8, "potent": 17, "potenti": [20, 23], "potteri": 19, "pound": 9, "povertydata": 10, "power": [7, 9, 10, 20, 32], "ppc": 26, "ppp046177196171": 14, "ppp089047044105": 14, "ppr1": 6, "practic": [6, 14, 16, 20, 32], "practition": 32, "pre": [0, 14, 26, 29, 30, 31], "preced": 29, "prefer": 23, "preferenti": 23, "prefix": [12, 29], "preload": 10, "premier": 19, "prepar": [0, 8, 19, 29, 30, 31], "prepend": [23, 25], "present": [7, 18, 23, 29], "preserv": 29, "presidenti": 19, "pressur": 20, "pretti": 20, "prevent": [23, 29], "previou": [25, 28, 29], "previous_cursor": 23, "prevpagetoken": [18, 28], "prg": 10, "price": [13, 20, 25], "print": [8, 9, 14, 19, 21, 29], "prioriti": 19, "privaci": 7, "pro": 6, "probabl": [6, 7, 19, 20, 32], "process": [4, 6, 7, 10, 14, 16, 19, 25], "processingdetail": 28, "prod": 11, "produc": [1, 14, 18, 19, 32], "product": [0, 1, 2, 12, 13, 18, 20, 25, 26, 29], "product1": 25, "product2": 20, "profession": 28, "profil": [17, 23], "program": [4, 23, 32], "programmat": 18, "prohibit": 17, "project": [7, 10, 12, 18, 20, 28, 32], "promis": 32, "promo": 19, "promot": 19, "proper": [9, 32], "properli": [13, 14, 18, 19, 25, 28, 29], "properti": [6, 7, 18, 20, 28], "protect": 7, "protocol": 10, "prouc": 18, "provid": [1, 2, 4, 6, 7, 9, 11, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 28, 29, 32], "proxi": [0, 5, 16, 30], "proxito": [10, 20], "proxy_retry_tim": 6, "ps7233": 6, "ptt5": 6, "public": [8, 10, 20, 23], "publication_languag": 19, "publication_nam": 19, "publish": [6, 19, 28], "publishedaft": [18, 28], "publishedbefor": [18, 28], "pump": [9, 26], "punctuat": 29, "puppi": 23, "purchas": 13, "purpl": 18, "purpos": 17, "put": [7, 13, 14, 19, 20, 26], "puzzl": 28, "py": 7, "py8fhdj5dlvvxnq6zjotzvbg1s8skwaaaacxrstlmav4eo10jnqa8ihfydaaabjuleqvryw93y64rcmbcg4czk5fszdav3f63bdaxfv4qm": 11, "pydata": 7, "pypi": 29, "pyt": 20, "pyth": 20, "python": [0, 4, 7, 8, 14, 21, 23, 29, 30, 31, 32], "python3": [4, 7, 32], "python_tweet": 23, "q": [10, 18, 23, 28], "q_80": 19, "qatar": 7, "qd1a": 6, "qp1a": 6, "qualifi": 8, "qualiti": [7, 10, 11, 18], "quantifi": 26, "queri": [0, 11, 12, 14, 17, 18, 19, 23, 28, 29, 30, 31, 32], "query_": 25, "query_color": 25, "query_pric": 25, "query_s": 25, "query_tim": [12, 29], "querytim": [18, 29], "question": [0, 2, 19, 23, 26, 27, 29, 30, 31, 32], "question_mark": 9, "question_mark_count": 9, "question_mark_freq": 9, "question_mark_nam": 9, "question_marks_flat": 9, "question_marks_per_post": 9, "question_summari": 9, "question_text": 9, "questionnair": 18, "quick": [2, 7, 17, 19], "quickli": 6, "quit": [17, 19, 26], "quot": [27, 29], "quota": 28, "quotat": 26, "r": 4, "r0lgodlhaqabaiaaaaaaap": 11, "r16nw": 6, "r3ynjjjcug": [9, 26], "ra": 19, "race": [19, 28], "radio": 19, "radiu": 23, "rai": [10, 20], "rain": [9, 26], "rais": [4, 20, 29], "ran": 20, "randolph": 19, "random": [6, 17], "rang": [18, 23], "rank": [12, 20, 29, 32], "rapid": 14, "rate": [18, 19, 20, 23, 26, 28], "rate_limit_statu": 23, "rather": 26, "raw": 15, "rc2": 10, "re": [9, 19, 23, 26], "reachabl": 20, "read": [6, 7, 9, 11, 14, 25, 32], "read_csv": [9, 26], "read_json": [6, 7, 10, 17, 20], "read_parquet": [7, 14, 25], "readabl": [15, 17, 20, 32], "reader": 14, "readi": [7, 13, 23], "readm": 20, "readthedoc": [10, 16, 20, 23], "real": [9, 23, 26], "realiti": 17, "realli": [7, 10, 13, 14, 20, 26], "reason": [6, 9, 10, 14, 17, 20, 26, 29, 32], "receiv": [17, 23], "recent": [1, 23, 28], "recip": [0, 5, 20, 29, 30], "recommend": [9, 14, 20, 23], "recordingdetail": 28, "recurs": [4, 19, 20, 29], "red": [18, 25], "redirect": [0, 4, 14, 20, 29, 30, 31, 32], "redirect_": [7, 20], "redirect_df": 7, "redirect_from": 14, "redirect_reason": [10, 20], "redirect_tim": [7, 10, 20], "redirect_to": 14, "redirect_ttl": [10, 20], "redirect_url": [10, 20], "redistrict": [9, 26], "reduc": 7, "refer": [4, 7, 10, 12, 14, 18, 20, 23, 25], "referer_": 14, "referer_dir_1": 14, "referer_dir_2": 14, "referer_dir_3": 14, "referer_frag": 14, "referer_hostnam": 14, "referer_last_dir": 14, "referer_netloc": 14, "referer_path": 14, "referer_port": 14, "referer_queri": 14, "referer_schem": 14, "referer_url": 14, "referer_url_df": 14, "referr": [10, 24], "referrerpolici": [20, 29], "reflect": 29, "regard": [20, 26], "regardless": [18, 28, 29], "regex": [0, 6, 7, 8, 9, 11, 14, 15, 26, 29, 30, 32], "regex_raw": 15, "regga": 28, "region": [20, 23, 28], "regioncod": [18, 28], "regul": 7, "regular": [0, 4, 7, 8, 9, 11, 14, 20, 26, 29, 30, 31], "reilli": 15, "reinvent": 19, "rel": [6, 7, 12, 16, 17, 20, 25, 29, 32], "rel_valu": [26, 29], "relat": [9, 18, 19, 20, 28, 32], "relatedsit": 29, "relatedtovideoid": [18, 28], "relayout": 29, "releas": 29, "relev": [13, 18, 23, 24, 28], "relevancelanguag": [18, 28], "reli": 20, "reliabl": 12, "religion": 28, "remain": [2, 19, 27], "remaind": [2, 29], "remark": 20, "rememb": 20, "remov": [18, 20, 23, 26, 27, 28, 29], "renam": 25, "render": 32, "repeat": [7, 9, 25, 29], "repetit": 9, "replac": [1, 14, 19, 25, 29, 32], "repli": [23, 28], "repons": 29, "report": [10, 16, 20, 25, 26, 29, 32], "repres": [7, 9, 12, 14, 18, 19, 23, 26], "represent": 23, "request": [0, 5, 7, 10, 11, 12, 14, 16, 18, 20, 23, 28, 29, 30, 31, 32], "request_": 14, "request_dir_1": 14, "request_dir_10": 14, "request_dir_11": 14, "request_dir_12": 14, "request_dir_13": 14, "request_dir_2": 14, "request_dir_3": 14, "request_dir_4": 14, "request_dir_5": 14, "request_dir_6": 14, "request_dir_7": 14, "request_dir_8": 14, "request_dir_9": 14, "request_frag": 14, "request_head": 19, "request_headers_": [6, 7, 20], "request_headers_accept": [6, 10, 20], "request_headers_cooki": 20, "request_headers_proxi": 6, "request_headers_us": [6, 10, 20], "request_hostnam": 14, "request_last_dir": 14, "request_netloc": 14, "request_path": 14, "request_port": 14, "request_queri": 14, "request_query_": 14, "request_query__": 14, "request_query_a": 14, "request_query_aam": 14, "request_query_abspath": 14, "request_query_act": 14, "request_query_adapt": 14, "request_query_ag": 14, "request_query_albid": 14, "request_query_cmd": 14, "request_query_cod": 14, "request_query_cont": 14, "request_query_control": 14, "request_query_cpabc_calendar_upd": 14, "request_query_curpath": 14, "request_query_currentset": 14, "request_query_dir": 14, "request_query_dn": 14, "request_query_email": 14, "request_query_fil": 14, "request_query_file_link": 14, "request_query_filenam": 14, "request_query_filepath": 14, "request_query_findcli": 14, "request_query_fn": 14, "request_query_folderid": 14, "request_query_format": 14, "request_query_funct": 14, "request_query_gid": 14, "request_query_id": 14, "request_query_img": 14, "request_query_index": 14, "request_query_input_fil": 14, "request_query_item": 14, "request_query_itemid": 14, "request_query_lang": 14, "request_query_libpath": 14, "request_query_mod": 14, "request_query_mypath": 14, "request_query_nam": 14, "request_query_next_fil": 14, "request_query_nocontinu": 14, "request_query_op": 14, "request_query_opt": 14, "request_query_ord": 14, "request_query_p": 14, "request_query_pag": 14, "request_query_panel": 14, "request_query_path": 14, "request_query_posit": 14, "request_query_psd": 14, "request_query_q": 14, "request_query_redirect": 14, "request_query_ref": 14, "request_query_rid": 14, "request_query_sb_categori": 14, "request_query_scopenam": 14, "request_query_search_kei": 14, "request_query_servic": 14, "request_query_short": 14, "request_query_sit": 14, "request_query_srt": 14, "request_query_step": 14, "request_query_stockcodeintern": 14, "request_query_target": 14, "request_query_term": 14, "request_query_thumb": 14, "request_query_titl": 14, "request_query_todo": 14, "request_query_typ": 14, "request_query_typeid": 14, "request_query_url": 14, "request_query_usernam": 14, "request_query_v": 14, "request_query_var": 14, "request_query_wt": 14, "request_query_xdebug_session_start": 14, "request_schem": 14, "request_url": 14, "request_url_df": 14, "requir": [4, 6, 14, 18, 19, 20, 23, 24, 28, 29], "rerun": 6, "resampl": 19, "research": [13, 17, 32], "resolut": [18, 28], "resourc": [14, 18, 20, 23, 28, 32], "resourceid": 28, "resp_headers_": [7, 20], "resp_headers_access": 20, "resp_headers_ag": [10, 20], "resp_headers_alt": 10, "resp_headers_cach": [10, 20], "resp_headers_cf": [10, 20], "resp_headers_cont": [10, 20], "resp_headers_d": [10, 20], "resp_headers_etag": 10, "resp_headers_expect": [10, 20], "resp_headers_expir": [10, 20], "resp_headers_last": [10, 20], "resp_headers_permiss": 10, "resp_headers_referr": 10, "resp_headers_serv": [10, 20], "resp_headers_strict": [10, 20], "resp_headers_vari": [10, 20], "resp_headers_via": 10, "resp_headers_x": [10, 20], "resp_meta_": 29, "respect": [2, 6, 7, 14, 20, 23, 26, 29], "respons": [0, 4, 7, 11, 12, 14, 17, 18, 19, 20, 23, 28, 29, 30, 31, 32], "rest": [9, 12, 17], "restaur": 9, "restrict": [2, 11, 12, 18, 20, 23, 28, 29], "result": [0, 1, 6, 7, 8, 11, 14, 17, 19, 20, 23, 25, 26, 28, 29, 30, 31, 32], "result_typ": 23, "resultscor": 12, "resum": [0, 5, 30, 32], "retain": [13, 25], "retreiv": [6, 19, 29], "retri": 6, "retriev": [18, 19, 23, 28], "return": [1, 2, 4, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 23, 24, 25, 26, 27, 28, 29, 32], "returnd": 29, "retweet": [23, 26], "retweeted_of_m": 23, "retweets_of_m": 23, "reus": [18, 28], "reveal": 19, "revenu": 26, "revers": [0, 14, 18, 23, 28, 29, 30, 31], "reverse_dns_lookup": [14, 16, 29], "review": [14, 18, 19, 28], "rewear": 11, "rewrit": 29, "rfc": [18, 28], "rhythm": 28, "rich": [19, 23], "richer": 20, "right": [13, 14, 18, 19, 25], "right_char": 9, "rio": 13, "risk": 19, "rm": 6, "rm_word": 26, "rn": 19, "rnkt7myjj7hcnsvbnzg9qdqizefftx9ytz3": 10, "robot": [0, 5, 11, 14, 19, 20, 29, 30, 31, 32], "robots_output_fil": 17, "robots_url": 17, "robotsfiles_df": 17, "robotslist": 4, "robotstxt": 29, "robotstxt_df": 17, "robotstxt_last_modifi": [17, 29], "robotstxt_obei": [6, 10, 11], "robotstxt_test": [17, 29], "robotstxt_test_df": 17, "robotstxt_to_df": [17, 29], "robotstxt_url": 17, "robotx": 17, "rock": 28, "rogen": 19, "roku": 6, "roku4640x": 6, "role": [25, 28], "rolling_new": 19, "romanian": [4, 18, 21, 29], "root": 23, "rotat": 6, "rotating_proxi": 6, "rotating_proxy_list_path": 6, "rotatingproxymiddlewar": 6, "round": 7, "row": [7, 13, 14, 17, 18, 19, 26], "rq3a": 6, "rquest": 20, "rsvp": [9, 26], "rtd": [10, 20], "rtl": 9, "rule": [0, 5, 10, 11, 14, 17, 20, 25, 30, 32], "run": [0, 4, 6, 7, 8, 11, 12, 13, 16, 17, 18, 19, 20, 23, 25, 26, 29, 30, 31, 32], "runnig": 19, "running_crawl": [7, 29], "runspid": 7, "russia": 19, "russian": [4, 18, 19, 21, 29], "rv": 6, "s10": 6, "s20": 6, "s21": 6, "s22": [6, 19], "s3": 6, "s6": 6, "s7": 6, "s8": 6, "s9": 6, "s906n": 6, "safari": [6, 14], "safe": [18, 23], "safesearch": [18, 28], "safeti": [6, 18], "safetycheck": 17, "sahil": 7, "sai": [9, 13, 18, 20, 26, 32], "said": [9, 17], "sail": [18, 28], "sale": 26, "same": [0, 1, 2, 4, 5, 7, 8, 9, 11, 12, 13, 14, 17, 18, 19, 20, 23, 25, 26, 28, 29, 30, 32], "sampl": [8, 12, 19, 20], "sample_log": 14, "samsung": [6, 19], "samsungbrows": 6, "sara": 19, "satisfi": 14, "save": [0, 5, 7, 10, 11, 14, 17, 20, 25, 29, 30, 32], "saver": [9, 26], "scale": [0, 16, 29, 30, 31], "scari": 23, "scenario": 2, "schauspielerin": 12, "schema": 12, "scheme": [19, 25], "scienc": [19, 32], "scientist": 32, "score": [12, 26], "scrape": [0, 5, 14, 20, 29, 30, 31], "scraper": 14, "scrapi": [4, 6, 7, 10, 20, 32], "screen": [6, 23], "screen_nam": 23, "script": [6, 10], "script_src": 6, "sd": [18, 28], "sd1a": 6, "se": 6, "seahawk": 19, "search": [0, 10, 12, 17, 19, 23, 24, 25, 28, 29, 30, 31, 32], "search_us": 23, "searchterm": 28, "searchtyp": 18, "seattl": 19, "sec": 20, "second": [2, 6, 7, 9, 12, 13, 18, 19, 20, 25, 29, 32], "secret": 11, "section": [7, 17, 18, 28], "secur": [10, 20], "see": [2, 6, 7, 8, 9, 11, 12, 16, 17, 18, 19, 20, 23, 25, 26, 27, 28, 32], "seem": [19, 20, 26], "seen": [7, 14], "segment": [13, 23], "selecotr": 20, "select": [7, 11, 14, 18, 20, 23, 28], "selector": [0, 4, 29, 30, 31, 32], "selector_1": 20, "selector_2": 20, "self": 25, "sell": [4, 13, 19], "sem": [0, 20, 29, 31], "sem_campaign": 13, "semi": 23, "semkw": 4, "semrush": 32, "senat": 19, "senatewinn": 19, "send": [12, 14, 23, 32], "sendfil": [10, 20], "sens": [25, 26], "sensit": [6, 13, 23], "sent": [18, 23, 28], "sentenc": [4, 9, 26], "seo": [0, 5, 12, 13, 17, 19, 26, 29, 31], "seop": 12, "seoul": 12, "sep": [2, 29], "separ": [2, 4, 7, 9, 14, 18, 20, 23, 25, 28, 29], "sequenc": [26, 32], "serbian": 18, "seri": [6, 19, 25], "seriou": 26, "serp": [0, 12, 25, 29, 30, 31, 32], "serp_": [18, 32], "serp_df": 18, "serp_goog": [18, 20, 29, 32], "serp_youtub": [18, 29], "serv": [10, 20], "server": [0, 5, 10, 14, 16, 18, 20, 28, 30], "servic": [8, 12, 13, 18, 19], "session": [14, 18, 23], "set": [0, 2, 4, 5, 7, 10, 11, 12, 13, 14, 18, 19, 23, 25, 26, 27, 28, 29, 30, 31, 32], "set_auth_param": 23, "set_index": 19, "set_logging_level": 18, "seth": 19, "setup": [0, 29, 30, 31], "sever": [0, 6, 7, 10, 14, 18, 23, 26, 28, 29, 30, 31, 32], "seznambot": 17, "sgp771": 6, "shape": [9, 14, 19], "share": [15, 20, 26], "shatel": 16, "she": [9, 26], "sheet": [29, 32], "shield": 6, "shift": 13, "shoe": [20, 26], "shop": [2, 20], "short": [2, 18, 26, 28], "shorter": [2, 28, 29], "should": [1, 4, 6, 9, 10, 11, 12, 17, 18, 20, 23, 26, 28, 32], "shouldn": [13, 14], "shout": [9, 26], "show": [4, 7, 8, 9, 12, 13, 14, 18, 20, 25, 26, 28, 29], "show_list": 23, "show_owned_list": 23, "shown": [12, 25], "si": 19, "side": [9, 14, 26, 27], "sidebar": [20, 29], "sidebar_link": 20, "sidebar_links_url": 20, "sign": [4, 9, 17, 20, 25, 27], "signatur": 23, "signifi": 13, "silk": 6, "similar": [1, 7, 9, 20, 25, 28, 29], "similarli": [18, 20, 28], "simpl": [6, 7, 8, 9, 10, 16, 19, 20, 23, 26, 32], "simpler": 29, "simplest": [20, 25], "simpli": [6, 7, 13, 14, 16, 17, 19, 20, 26], "simplifi": [10, 18, 28], "simul": 28, "simultan": 20, "sinc": [1, 2, 7, 9, 10, 19, 20, 23, 26], "since_id": 23, "sine": 10, "singapor": 1, "singl": [14, 17, 18, 20, 23], "singular": 9, "sinhala": [4, 21, 29], "site": [6, 7, 13, 17, 18, 19, 20, 25, 26, 28, 29, 32], "site_crawl": 20, "site_scraping_tos_term": 17, "sitemap": [0, 17, 20, 25, 29, 30, 31, 32], "sitemap_df": 19, "sitemap_download": 29, "sitemap_last_modifi": [19, 29], "sitemap_size_mb": [19, 29], "sitemap_to_df": [19, 20, 29], "sitemap_url": [4, 19], "sitemapindex": 29, "sitename_crawl_yyyy_mm_dd": 20, "sitesearch": 18, "sitesearchfilt": 18, "sitmeapindex": 19, "situat": 25, "sivasubramanian": 19, "six": [2, 26], "sixti": 26, "size": [7, 10, 14, 17, 18, 19, 20, 23, 25, 29], "size_i": 7, "size_x": 7, "skateboard": 19, "skin": 8, "skip": [7, 20, 29], "skip_statu": 23, "skip_url_param": 29, "slash": 11, "slectorgadget": 20, "slice": 23, "slight": 29, "slot": [1, 2, 29, 32], "slovak": 18, "slovenia": 8, "slovenian": 18, "slow": [0, 5, 30], "slug": [11, 19, 23, 25], "slurp": [6, 17], "sm": [6, 25], "small": [7, 8, 17, 18, 28], "smaller": [7, 19, 28, 29], "smartphon": [6, 14, 19], "smile": 8, "smilei": [8, 9], "snippet": [18, 20, 28, 29], "snow": [9, 26], "so": [0, 2, 5, 7, 9, 10, 11, 12, 13, 14, 17, 18, 19, 20, 23, 25, 26, 28, 30, 32], "social": [8, 9, 20, 21, 25, 26], "societi": 28, "softwareappl": 12, "solut": [6, 7], "some": [6, 7, 8, 9, 10, 11, 14, 16, 17, 18, 19, 20, 23, 25, 26, 28, 29, 32], "somehow": 26, "someon": 26, "someth": [1, 2, 26], "sometim": [2, 6, 13, 17, 20, 26, 32], "somewher": [26, 32], "soni": 6, "sonl": 7, "soon": 17, "sophist": 32, "sort": [18, 21, 25, 26, 28, 29], "sort_valu": [7, 26], "soul": 28, "sound": [9, 26], "sourc": [1, 2, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 23, 24, 25, 26, 27, 28], "source_fil": 14, "south": 12, "space": [2, 4, 18, 19, 23, 27, 29], "spaci": [21, 29], "spam": 28, "span": [20, 29], "spanish": [4, 9, 18, 21, 29], "speak": 18, "spec": 2, "special": [4, 9, 11, 19, 20, 25, 29, 32], "specif": [4, 7, 10, 18, 23, 28, 32], "specifi": [0, 2, 5, 7, 9, 12, 14, 17, 18, 20, 23, 27, 28, 29, 30, 32], "speed": [10, 20, 29], "spend": [9, 23, 26, 32], "spent": 32, "sphinx": 29, "spider": [0, 4, 6, 7, 10, 11, 14, 29, 30, 31, 32], "split": [0, 2, 14, 19, 20, 26, 27, 29, 30, 31, 32], "sport": [8, 11, 19, 28], "spotlight": 7, "spread": [7, 32], "squar": 8, "square320": 7, "square_bann": 24, "src": [6, 7, 11, 20, 29], "srcset": [20, 29], "ss": 28, "stage": 14, "standard": [14, 18, 20, 23, 25, 28, 32], "star": [1, 19], "start": [2, 4, 6, 7, 9, 13, 14, 18, 19, 20, 23, 26, 28, 29, 32], "start_request": [10, 11], "start_url": 11, "starting_out": 23, "stat": [6, 8, 9, 29], "state": [12, 18, 19, 25, 28], "statement": 11, "static": [7, 11, 14, 17], "static01": [7, 19], "statist": [8, 9, 16, 28, 29, 32], "statu": [0, 4, 7, 8, 14, 20, 23, 28, 29, 30, 31, 32], "status": 23, "stdout": 14, "stearn": 19, "step": [6, 12, 27, 32], "still": [2, 9, 11, 18, 19, 23, 25, 26, 28], "stiller": 19, "stitch": 32, "stop": [0, 4, 5, 7, 20, 26, 29, 30, 32], "stopword": [0, 26, 29, 30, 31, 32], "storag": [14, 19], "store": [11, 14, 20], "store_uri": 11, "storebot": 6, "str": [1, 2, 7, 8, 9, 10, 11, 12, 13, 14, 17, 18, 19, 20, 23, 24, 25, 26], "straight": 10, "straightforward": [9, 11, 20, 25], "strateg": [19, 32], "strategi": [0, 5, 7, 10, 11, 13, 17, 19, 20, 28, 30], "stream": [23, 32], "strftime": 14, "strict": [18, 28], "stricter": 29, "string": [0, 1, 2, 5, 7, 8, 9, 12, 14, 15, 18, 23, 24, 27, 28, 29, 30], "stringify_id": 23, "strip": [23, 26, 27], "strongli": [20, 23], "struct": 7, "structur": [0, 6, 7, 10, 13, 19, 23, 29, 30, 31, 32], "stuff": [2, 32], "style": [17, 20, 29], "stylesheet": 6, "sub": [0, 4, 5, 7, 8, 9, 11, 14, 17, 19, 20, 23, 25, 29, 30, 32], "sub_group": 8, "subdomain": [10, 20], "submodul": [20, 29, 30, 31], "subpackag": [29, 30, 31], "subscrib": [23, 28], "subscribersnippet": 28, "subscript": [23, 28], "subscription_order_relev": 28, "subscriptions_list": 28, "subsequ": [18, 28], "subset": [7, 19], "substr": 12, "suchmaschinenmarket": 12, "suchmaschinenoptimierung": 12, "sugar": [9, 26], "suggest": [6, 25, 28], "suit": 14, "suitabl": 12, "sulli": 12, "summar": [7, 8, 9, 14], "summari": [7, 8, 9, 11, 29, 32], "summarize_crawled_img": 11, "summer_promo": 24, "super": [10, 14], "superhero": 23, "suppli": [7, 11, 16, 18, 20, 29], "support": [0, 10, 17, 18, 20, 23, 28, 29, 30, 31, 32], "suppos": [17, 25], "sure": [0, 1, 2, 5, 9, 10, 16, 18, 20, 23, 25, 29, 30], "surround": [9, 29], "surrounding_text": 9, "survei": [18, 30], "susan": 19, "suspend": 23, "svc": 10, "swami": 19, "swedish": [4, 18, 21, 29], "sweet": [9, 26], "switch": 6, "sy": 14, "sym": 9, "symbol": [8, 9, 29], "syndic": [18, 28], "system": [14, 25, 29], "sz": 28, "s\u00fcdkorea": 12, "t": [0, 1, 4, 5, 7, 9, 10, 13, 14, 18, 19, 20, 25, 26, 27, 29, 30, 32], "t550": 6, "t827r4": 6, "tab": 6, "tabl": [11, 12, 13, 20, 32], "tablet": [6, 19], "tabular": 32, "tackl": 32, "tag": [6, 7, 10, 17, 18, 19, 20, 28, 29, 32], "tagalog": [4, 21, 29], "tail": 13, "take": [2, 6, 7, 9, 12, 13, 14, 17, 19, 20, 23, 25, 26, 29, 32], "taken": 11, "talk": 2, "tamil": [4, 21, 29], "target": [13, 28], "task": [10, 13, 26, 27, 32], "tatar": [4, 21, 29], "tc2": 19, "tea": [9, 26], "teach": 19, "teal": 18, "team": 19, "technic": [7, 12, 32], "techniqu": [6, 26, 32], "technologi": [12, 28], "tediou": [13, 25], "telegrambot": 17, "tell": 20, "telugu": [4, 21, 29], "templat": [1, 2, 6, 19], "temporari": 14, "ten": [18, 26], "tenni": 28, "teoma": 17, "term": [18, 20, 24, 28, 32], "test": [0, 6, 8, 29, 30, 31, 32], "tester": [0, 29, 30, 31], "text": [0, 1, 6, 7, 10, 14, 20, 21, 23, 27, 28, 29, 31], "text_ad": 2, "text_list": [4, 8, 9, 26, 27], "text_list2": 26, "textformat": 28, "textual": [8, 9], "tha": 11, "thai": [4, 21, 29], "than": [1, 2, 17, 18, 19, 20, 23, 26, 28, 29, 32], "thebe": 29, "thei": [6, 7, 9, 14, 16, 17, 18, 19, 20, 23, 25, 26, 27, 28, 29, 32], "theinterpret": 7, "them": [0, 2, 4, 5, 7, 9, 10, 11, 13, 14, 16, 17, 19, 20, 23, 25, 26, 29, 30, 32], "themselv": [26, 29], "thenc": 26, "theoffic": [9, 26], "thereaft": 26, "therebi": [26, 28], "therefor": [6, 18, 26, 28], "therein": 26, "thereupon": 26, "thi": [1, 2, 4, 6, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 27, 28, 29, 30, 32], "thing": [1, 2, 9, 12, 13, 19, 20, 25, 27, 30, 32], "think": [9, 20, 25], "third": [20, 23, 26], "those": [0, 2, 4, 5, 7, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 25, 26, 28, 29, 30, 32], "though": [17, 18, 20, 23, 26, 28], "thought": [7, 12], "thousand": [6, 8, 10, 16, 26, 32], "thread": [19, 28, 29], "three": [1, 6, 7, 9, 19, 20, 23, 25, 26, 29, 32], "through": [6, 10, 11, 14, 17, 18, 19, 20, 23, 25, 26, 27, 28, 32], "throughout": 26, "thru": 26, "thu": [7, 10, 20, 26], "thumbwid": 7, "ticket": 18, "tidi": 7, "tiktok": [9, 26], "time": [1, 2, 6, 7, 8, 9, 10, 12, 13, 14, 16, 17, 18, 19, 20, 23, 26, 28, 29, 32], "timecr": 28, "timelin": 23, "timeout": 29, "timestamp": [7, 14], "tini": 17, "tip": 10, "titl": [6, 7, 9, 13, 18, 20, 25, 26, 28, 29, 32], "tl": 14, "tld": [18, 29], "tljh": 7, "to_datetim": 14, "to_fram": 7, "to_parquet": 14, "toctre": 20, "todai": [1, 8, 9], "togeth": [7, 8, 9, 14, 17, 23, 25, 26, 32], "token": [0, 26, 29, 30, 31], "token_typ": 23, "tokyo": 1, "tolist": [17, 19], "tomiobaro": 11, "tommi": 19, "tone": 8, "too": [0, 5, 9, 20, 26, 30, 32], "took": 20, "tool": [0, 10, 14, 18, 19, 20, 25], "top": [0, 7, 8, 9, 12, 13, 14, 16, 18, 19, 20, 23, 25, 26, 28, 29, 30, 31, 32], "top_bot": 14, "top_currency_symbol": 9, "top_domain": 9, "top_emoji": [8, 9], "top_emoji_categori": 29, "top_emoji_group": [8, 9], "top_emoji_sub_categori": 29, "top_emoji_sub_group": [8, 9], "top_emoji_text": [8, 9], "top_exclamation_mark": 9, "top_hashtag": 9, "top_ment": 9, "top_numb": 9, "top_question_mark": 9, "top_tld": 9, "top_url": 9, "top_word": 9, "topic": [4, 18, 19, 23, 25, 28, 29], "topic1": 25, "topic2": 25, "topic_1": 25, "topic_2": 25, "topicdetail": 28, "topicid": [18, 28], "tor": 14, "total": [2, 14, 17, 26], "tourism": 28, "toward": [23, 26], "town": 29, "toyota": [1, 13], "traceback": 1, "track": [11, 28, 29, 32], "tracker": 19, "tradit": [18, 28], "traffic": [20, 23, 24], "trail": [27, 29], "train": 9, "transport": [10, 19, 20], "travel": [8, 9], "trend": [23, 26, 32], "tricki": 20, "trigger": 6, "trim": 27, "trim_us": 23, "trip": 13, "true": [1, 2, 4, 6, 7, 9, 10, 11, 13, 14, 17, 18, 19, 20, 23, 25, 26, 28, 29], "truestatus": 23, "truncat": 20, "try": [14, 17, 18, 20, 26, 28, 32], "tuesdai": [9, 26], "tuh": 11, "turkc": 19, "turkish": [4, 18, 21, 29, 32], "turn": [7, 18], "tutor": 13, "tutori": [13, 20, 32], "tv": [6, 19, 28], "tw": 18, "tweet": [9, 21, 23, 26, 32], "tweet_": 23, "tweet_mod": 23, "tweet_text": [9, 26], "twelv": 26, "twenti": [13, 17, 26], "twice": [0, 5, 26, 29, 30], "twimg": 23, "twitter": [0, 6, 9, 10, 17, 20, 24, 29, 30, 31, 32], "twitterbot": [14, 17], "two": [1, 2, 6, 7, 9, 11, 13, 15, 17, 18, 20, 25, 26, 28, 29, 32], "twtr_content": 6, "twtr_name": 6, "twython": [23, 29], "txt": [0, 5, 8, 11, 14, 19, 20, 29, 30, 31, 32], "type": [0, 1, 2, 4, 6, 9, 10, 12, 13, 14, 17, 18, 20, 23, 25, 26, 28, 29, 30, 31, 32], "typic": [1, 2, 7, 9, 10, 14, 16, 18, 19, 20, 23, 25, 26, 28, 32], "u": [6, 7, 13, 14, 18, 19, 20, 26, 28], "u20": 6, "u5vdyevvf": [9, 26], "ua": 14, "ua_": 14, "ua_devic": 14, "ua_df": 14, "ua_famili": 14, "ua_major": 14, "ua_minor": 14, "ua_o": 14, "ua_pars": 14, "ua_patch": 14, "ua_str": 14, "ubuntu": [6, 10], "ug": 21, "uk": 18, "ukchina": 19, "ukrainian": [4, 21, 29], "ultra": [6, 19], "umlrxrgovgmqtj4hxc69an5hj9pcyyqzfxsavk58tjmntwgv24pw9kpe0fgbioklomczkngleuxlhyiimx": 11, "unalign": 25, "unchang": 7, "und": 14, "under": [6, 10, 11, 12, 14, 17, 26, 29], "undersand": 7, "underscor": 19, "understand": [7, 12, 13, 17, 18, 19, 23, 25, 32], "understood": 12, "unexpect": 20, "unicod": [8, 15], "unifi": 29, "uniqu": [28, 29], "unique_currency_symbol": 9, "unique_emoji": [8, 9], "unique_exclamation_mark": 9, "unique_hashtag": 9, "unique_ment": 9, "unique_numb": 9, "unique_question_mark": 9, "unique_url": 9, "unique_word": 9, "unit": [18, 19, 23, 28], "univers": 32, "unix": 32, "unknown": [14, 16], "unless": [17, 26], "unlik": 25, "unlock": 20, "unnest": 23, "unpack": [7, 19], "unread": 28, "unrel": 32, "unsign": 28, "unspecifi": 18, "until": [9, 23, 26], "unusu": 25, "up": [2, 7, 10, 12, 13, 18, 20, 23, 25, 26, 29, 32], "upcom": [18, 28, 32], "updat": [6, 19, 29], "upload": [13, 18, 19, 23, 28], "upon": 26, "upscal": 7, "upshot": 7, "upshot_144x144": 7, "urdu": [4, 19, 21, 29], "uri": [10, 20], "url": [0, 2, 6, 7, 9, 10, 11, 12, 14, 15, 17, 18, 19, 23, 26, 28, 29, 30, 31, 32], "url_": 32, "url_1": [4, 11, 25], "url_2": [4, 11, 25], "url_3": [4, 11], "url_build": 20, "url_count": 9, "url_df": 19, "url_freq": 9, "url_list": [4, 6, 7, 10, 14, 20], "url_path": 17, "url_redirected_to": 29, "url_summari": 9, "url_to_df": [14, 19, 25, 29, 32], "url_utm_ga": [24, 29], "urldf": 25, "urls_flat": 9, "urls_per_post": 9, "urls_to_test": 17, "urlth": 18, "urlyt": 14, "us": [0, 1, 5, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 28, 29, 30, 31, 32], "usa": 18, "usag": [4, 7, 8, 9, 20, 23, 32], "usd": 9, "usemap": [20, 29], "user": [0, 4, 5, 7, 9, 11, 13, 14, 16, 18, 19, 20, 23, 28, 29, 30, 31, 32], "user123": 6, "user_": 23, "user_ag": [6, 10, 11, 14, 17, 20], "user_agent_pars": 14, "user_id": 23, "user_imag": 11, "user_ment": 23, "user_timelin": 23, "usernam": [6, 28], "usual": [6, 12, 16, 23, 25, 26], "usuali": 14, "utc": 19, "utf": [10, 14, 23], "util": [2, 19, 23], "utm": [24, 29], "utm_campaign": 24, "utm_cont": 24, "utm_medium": 24, "utm_sourc": 24, "utm_term": 24, "v": [0, 19, 30], "v1": 12, "v11": 15, "v13": 29, "v15": [8, 29], "v1642801328": 19, "v1644335726": 19, "v1644381627": 19, "v1644418652": 19, "v1644595412": 19, "v2": 11, "v271": 19, "v274": 19, "v281": 19, "v282": 19, "v285": 19, "v286": 19, "v290": 19, "v2_0_0m1638886228": 14, "v410": 6, "v41020c": 6, "v5": 7, "vacanc": 13, "valid": [9, 18, 23, 24, 28], "valu": [2, 4, 6, 7, 10, 12, 14, 18, 19, 20, 23, 25, 26, 28, 29], "value_count": [7, 14, 19], "valueerror": [1, 29], "variabl": 29, "varieti": 23, "variou": [7, 8, 9, 10, 19, 26, 29, 32], "ve": 29, "veget": 8, "vegetable_emoji": 8, "vegur": 10, "vehicl": 28, "venti": [9, 26], "verb": 13, "veri": [0, 1, 6, 10, 12, 13, 14, 16, 17, 19, 20, 23, 25, 26, 29, 30, 31, 32], "verif": 6, "verifi": [6, 16, 19], "versatil": 29, "version": [6, 10, 14, 20, 29], "vi": 7, "via": [18, 23, 26, 28], "vibe": [9, 26], "vid_id": 18, "video": [0, 6, 17, 18, 23, 26, 28, 29, 30, 31, 32], "video_categories_list": 28, "video_content_loc": 19, "video_descript": 19, "video_df": 18, "video_dur": 19, "video_expiration_d": 19, "video_publication_d": 19, "video_thumbnail_loc": 19, "video_titl": 19, "videocapt": [18, 28], "videocategori": 28, "videocategoryid": [18, 28], "videocount": [18, 28], "videodefinit": [18, 28], "videodimens": [18, 28], "videodur": [18, 28], "videoembedd": [18, 28], "videoid": 28, "videolicens": [18, 28], "videos_list": 28, "videosynd": [18, 28], "videotyp": [18, 28], "vietnames": [4, 19, 21, 29], "view": [12, 18, 23, 26, 28], "viewcount": [18, 28], "viewer": [18, 28], "viewport": [6, 7, 20, 29], "vine": 23, "violat": 28, "virginia": 19, "visit": [9, 26], "visual": 32, "vita": 6, "vmi660635": 14, "volleybal": 28, "von": 12, "vp": 19, "vymfddnm5hx334": 11, "w": [6, 10, 18, 19], "w3c": 20, "w_320": 19, "wa": [6, 7, 8, 9, 11, 14, 17, 19, 20, 23, 26, 29], "wai": [2, 6, 7, 9, 10, 14, 19, 20, 23, 25, 29, 32], "wait": [6, 20], "walk": 19, "wall": 14, "want": [0, 1, 2, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 27, 28, 30, 32], "warn": 18, "watch": [1, 19, 23], "water": 9, "we": [2, 7, 9, 12, 13, 14, 17, 19, 20, 25, 26, 32], "wear": 11, "web": [4, 10, 11, 12, 18, 19, 20, 21], "web000079": 20, "web00007a": 20, "web00007c": 20, "web00007g": 20, "web00007h": 20, "web00007k": 20, "webmast": 6, "webp": 7, "webpag": [7, 18, 28], "websit": [0, 4, 5, 12, 14, 17, 18, 19, 20, 25, 28, 29, 30, 31, 32], "website_name_crawl_1": 6, "website_name_crawl_2": 6, "wed": 20, "week": [18, 19, 23, 32], "weight": [0, 29, 30, 31, 32], "well": [7, 9, 10, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 27, 28, 29, 32], "went": 14, "were": [6, 7, 14, 18, 19, 23, 26, 28, 29], "weren": 14, "west": 19, "what": [2, 6, 7, 8, 9, 10, 12, 13, 14, 16, 17, 19, 20, 23, 25, 26, 32], "whatev": [26, 32], "when": [1, 4, 6, 7, 9, 10, 12, 14, 17, 18, 19, 20, 23, 26, 28, 29, 32], "whenc": 26, "whenev": 26, "where": [2, 4, 6, 7, 9, 10, 11, 14, 18, 19, 20, 23, 25, 26, 29, 32], "wherea": 26, "whereaft": 26, "wherebi": 26, "wherein": 26, "whereupon": 26, "wherev": [26, 29], "whether": [1, 2, 4, 6, 7, 9, 12, 13, 14, 17, 18, 19, 20, 23, 25, 26, 28, 29], "which": [2, 4, 6, 7, 9, 10, 11, 12, 14, 17, 18, 19, 20, 21, 23, 25, 26, 27, 28, 29, 32], "whichev": [20, 25, 29, 32], "while": [0, 4, 5, 7, 14, 18, 19, 23, 25, 26, 28, 29, 30, 31], "white": [9, 18, 26, 27], "whitespac": [2, 26, 27, 29], "whither": 26, "who": [6, 9, 17, 18, 19, 23, 26, 28], "whoever": 26, "whole": [8, 10, 20, 26], "whom": [23, 26], "whose": [18, 26], "why": [6, 7, 14, 17, 26], "width": [7, 11, 20, 28, 29], "wifiwebauthapplet": 6, "wii": 6, "wiiu": 6, "wildfir": 6, "wilson": 19, "win": [19, 26], "win64": [6, 14], "window": [6, 14, 20], "wire": 19, "wired_autocomplet": 19, "wired_first": 19, "wired_reinv": 19, "wired_seth": 19, "wired_video": 19, "wired_wir": 19, "within": [1, 2, 13, 18, 20, 23, 26, 27, 28], "without": [4, 6, 10, 18, 20, 25, 26, 28, 29, 32], "wjth": 7, "wmnj0klhtpib9lcutya8k": 11, "woeid": 23, "won": [6, 9, 26, 32], "word": [0, 1, 2, 8, 9, 12, 13, 14, 18, 19, 23, 29, 30, 31, 32], "word_count": 9, "word_freq": [9, 26], "word_frequ": [26, 27, 29, 32], "word_summari": 9, "word_token": [27, 29], "wordfreq": 4, "words_flat": 9, "words_per_post": 9, "words_to_extract": 9, "words_to_find": 9, "work": [6, 7, 9, 12, 13, 14, 17, 20, 23, 26, 28, 29, 32], "worker": [16, 19], "workflow": 32, "world": [7, 19], "worldnew": 19, "worri": [6, 17], "worth": 8, "would": [2, 6, 7, 9, 11, 14, 17, 18, 19, 20, 23, 25, 26, 28], "wow64": 6, "wrangl": 32, "wrap": 29, "wrestl": 28, "write": [2, 7, 32], "written": [9, 17, 18, 19], "wrong": [14, 17], "wtd_freq": [26, 29], "wtd_freq_perc": 26, "wtd_freq_perc_cum": 26, "wv": 6, "www": [4, 6, 7, 8, 9, 10, 11, 14, 17, 19], "x": [6, 12, 13, 14, 18, 19, 20, 32], "x10": 6, "x11": [6, 14], "x5": 6, "x64": [6, 14], "x86_64": [6, 14], "x906c": 6, "xbox": 6, "xbox_one_": 6, "xhtml": [10, 20], "xj8sxgocdz6ejcp7jspbqllibivmpewy7as1poez30pvqlaqvjrgeqtlfp1dblpyb0bdd": 11, "xlarg": 18, "xm": 20, "xml": [0, 10, 17, 25, 29, 30, 31, 32], "xpath": [0, 4, 5, 29, 30, 31, 32], "xpath_selector": [4, 7, 20, 29], "xperia": 6, "xr": 6, "xxlarg": 18, "xz": 6, "y": [6, 14, 18], "yahoo": [6, 23], "yandex": 17, "yandexbot": 14, "ye": [10, 12], "yea": 12, "year": [18, 19, 23], "yellow": [8, 18], "yet": [7, 13, 20, 25, 26], "yeti": 17, "yh5baeaaaaalaaaaaabaaeaaaibraa7": 11, "yoga": 6, "york": 19, "you": [1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 23, 25, 26, 27, 28, 30, 32], "your": [2, 4, 6, 7, 8, 9, 10, 12, 13, 14, 16, 17, 18, 19, 20, 23, 24, 25, 26, 28, 30], "your_app_kei": 23, "your_app_secret": 23, "your_cx": 18, "your_google_developer_kei": 12, "your_kei": 18, "your_oauth_token": 23, "your_oauth_token_secret": 23, "your_user_ag": 6, "yourself": [20, 26], "yourselv": 26, "yout": 25, "youtub": [0, 12, 26, 29, 30, 31, 32], "youtube_channel_detail": 18, "youtube_video_detail": 18, "youuuuuu": 9, "youuuuuuu": 9, "ysearch": 6, "ysriv8zrqmwha1znqibuuv3jo3cn5fly3qimy2kitajb3": 11, "yt": 6, "yymmdd_article_titl": 19, "yyyi": [19, 23, 28], "z": [6, 14, 19], "z4": 6, "z5": 6, "zero": [12, 18, 20, 28], "zgrab": 14, "zh": [18, 28], "ziggozakelijk": 14, "zip": [9, 14, 29], "zo0cnvuigj": [9, 26], "\u00e0": 1, "\u0294": [0, 29, 30, 31], "\u03b5\u03af\u03c3\u03b1\u03b9": 9, "\u03c0\u03ce\u03c2": 9, "\u062a\u0630\u0647\u0628": 9, "\u062d\u0627\u0644\u0643": 9, "\u0643\u064a\u0641": 9, "\u0644\u0627": 9, "\u0645\u0631\u062d\u0628\u0627": 9}, "titles": ["advertools package", "Create Ads on a Large Scale", "Create Ads Using Long Descriptive Text (top-down approach)", "advertools.cli module", "advertools Command Line Interface (CLI)", "advertools.code_recipes package", "\ud83d\udd77 SEO Crawling &amp; Scraping: Strategies &amp; Recipes", "Crawling and Scraping Analysis", "Emoji: Extract, Analyze, and Get Insights", "Extract structured entities from text lists", "\ud83d\udd77 Python Status Code Checker with Response Headers", "Image Crawler and Downloader", "Import and Analyze Knowledge Graph Results on a Large Scale", "Generate Keywords for SEM Campaigns", "Log File Analysis", "Regular Expressions for Extracting Structured Entities", "Reverse DNS Lookup in Bulk", "\ud83e\udd16 Analyze and Test robots.txt Files on a Large Scale", "Import Search Engine Results Pages (SERPs) for Google and YouTube", "Download, Parse, and Analyze XML Sitemaps", "\ud83d\udd77 Python SEO Crawler / Spider", "Stopwords in Several Languages", "Survey - share feedback", "Twitter Data API", "URL Builders", "Split, Parse, and Analyze URL Structure", "Text Analysis", "Tokenize Words (N-grams)", "YouTube Data API", "advertools", "advertools", "advertools", "<code class=\"docutils literal notranslate\"><span class=\"pre\">advertools</span></code>: productivity &amp; analysis tools to scale your online marketing"], "titleterms": {"": 12, "0": 29, "01": 29, "02": 29, "03": 29, "04": 29, "05": 29, "06": 29, "07": 29, "08": 29, "09": 29, "1": 29, "10": 29, "11": 29, "12": 29, "1234567890\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669\u32ba\ud804\udc5b\ud800\udd0d\ud802\udcaa\u24f2\ud804\udc63\ud800\udd28\ud802\udd1b": 9, "13": 29, "14": 29, "15": 29, "17": 29, "18": 29, "19": 29, "2": 29, "2018": 29, "2019": 29, "2020": 29, "2021": 29, "2022": 29, "2023": 29, "2024": 29, "21": 29, "22": 29, "23": 29, "24": 29, "25": 29, "26": 29, "27": 29, "29": 29, "3": 29, "30": 29, "31": 29, "4": 29, "5": 29, "6": 29, "7": 29, "8": 29, "9": 29, "On": 20, "The": 25, "absolut": 26, "account": 12, "ad": [1, 2], "addit": 20, "address": 4, "advertool": [0, 3, 4, 5, 29, 30, 31, 32], "agent": [6, 17], "an": 4, "analysi": [7, 14, 26, 30, 32], "analyt": 20, "analyz": [7, 8, 12, 14, 17, 19, 25], "api": [12, 23, 28], "approach": [2, 17, 20, 32], "articl": [2, 20], "audit": 6, "authent": 23, "automat": 6, "base": 6, "behavior": 20, "builder": 24, "bulk": [16, 17], "campaign": [13, 32], "can": 6, "certain": 6, "chang": [6, 29, 30], "checker": 10, "cli": [3, 4], "code": 10, "code_recip": 5, "column": 7, "command": 4, "compon": 4, "compress": [4, 7], "concurr": 6, "condit": 6, "consol": 20, "content": [0, 5, 30, 32], "control": 6, "convent": 32, "convert": 4, "copi": 6, "count": [4, 26], "crawl": [4, 6, 7, 14, 20], "crawler": [4, 11, 20], "creat": [1, 2], "css": 20, "csv": 4, "currenc": 9, "custom": [6, 20], "data": [7, 14, 20, 23, 28], "datafram": [4, 14], "default": 6, "depth": 6, "descript": 2, "desir": 4, "determin": 20, "di": 6, "directori": 25, "discoveri": 20, "dn": [4, 16], "do": 6, "document": 4, "domain": 6, "don": 6, "down": [2, 6], "download": [4, 11, 19], "element": 20, "emoji": [4, 8, 9], "engin": 18, "entiti": [4, 9, 15], "etc": 4, "exclam": 9, "explor": 7, "express": [6, 15], "extract": [4, 6, 8, 9, 15, 20], "facebook": 2, "feed": 2, "feedback": 22, "file": [4, 7, 11, 14, 17], "follow": [6, 20], "format": [4, 14], "frequenc": 26, "from": [4, 6, 8, 9], "function": [7, 9, 14, 20, 23], "gener": [4, 13], "get": [4, 8], "googl": [2, 12, 18, 20], "gram": 27, "graph": 12, "handl": 7, "hard": 6, "hashtag": [4, 9], "head": 4, "header": [6, 10, 19], "hit": 6, "how": [6, 12, 14], "i": 6, "imag": [7, 11], "import": [12, 18], "includ": 6, "index": [19, 30], "indic": 30, "insight": 8, "instal": 32, "instant": 2, "intent": 4, "interfac": 4, "ip": 4, "its": 6, "job": 6, "keyword": [4, 13], "knowledg": 12, "known": 4, "languag": [4, 21], "larg": [1, 7, 12, 17, 25], "later": 6, "length": 4, "line": 4, "link": [6, 7, 20], "list": [4, 6, 9, 20], "log": [4, 6, 14, 29, 30], "logs_to_df": 14, "long": 2, "lookup": [4, 16], "make": 6, "market": [30, 32], "media": [30, 32], "mention": [4, 9], "method": 4, "mode": [6, 20], "modul": [0, 3, 5, 7], "multipl": 6, "my": 6, "n": 27, "name": 11, "netloc": 4, "new": [19, 20], "number": [4, 6, 9, 25], "obei": 6, "onli": 6, "onlin": [30, 32], "option": 4, "packag": [0, 5], "page": [6, 18, 20], "paramet": [20, 25], "parquet": [4, 7], "pars": [4, 14, 19, 25], "path": [4, 25], "pattern": 20, "paus": 6, "perform": 4, "philosophi": 32, "phrase": 4, "pre": 20, "prepar": 14, "product": [4, 30, 32], "proxi": 6, "python": [10, 20], "queri": [4, 20, 25], "question": 9, "recip": 6, "redirect": 7, "regex": [4, 20], "regular": [15, 19], "request": [6, 19], "respons": 10, "result": [12, 18], "resum": 6, "revers": [4, 16], "robot": [4, 6, 17], "rule": 6, "run": 14, "same": 6, "save": [4, 6], "scale": [1, 12, 17, 32], "scheme": 4, "scrape": [6, 7], "search": [4, 8, 18, 20], "select": 4, "selector": 20, "sem": [4, 13, 30, 32], "seo": [4, 6, 20, 30, 32], "serp": [18, 20], "server": 6, "set": [6, 20], "setup": 12, "sever": 21, "share": 22, "sitemap": [4, 19], "slow": 6, "so": 6, "social": [30, 32], "specifi": 6, "spider": 20, "split": [4, 25], "statu": 10, "stop": 6, "stopword": [4, 21], "strategi": 6, "string": 6, "structur": [4, 9, 15, 25], "sub": 6, "submodul": [0, 5], "subpackag": 0, "suppli": 4, "support": 14, "sure": 6, "survei": 22, "t": 6, "tabl": [4, 30], "test": 17, "tester": 17, "text": [2, 4, 8, 9, 26, 30, 32], "them": 6, "those": 6, "token": [4, 27], "too": 6, "tool": [30, 32], "top": 2, "tweet": 4, "twice": 6, "twitter": 23, "txt": [4, 6, 17], "type": 7, "url": [4, 20, 24, 25], "us": [2, 4, 6, 12], "user": [6, 17], "v": 26, "veri": 7, "video": 19, "want": 6, "websit": [6, 7], "weight": [4, 26], "while": [6, 20], "word": [4, 26, 27], "xml": [4, 19], "xpath": [6, 20], "your": 32, "youtub": [18, 28], "\u0294": 9}})
\ No newline at end of file

	pid	started	elapsed	%mem	%cpu	command	output_file	crawled_urls
0	195720	21:41:14	01:02	1.6	95.7	/opt/tljh/user/bin/python /opt/tljh/user/bin/scrapy runspider /opt/tljh/user/lib/python3.10/site-packages/advertools/spider.py -a url_list=https://cnn.com -a allowed_domains=cnn.com -a follow_links=True -a exclude_url_params=None -a include_url_params=None -a exclude_url_regex=None -a include_url_regex=None -a css_selectors=None -a xpath_selectors=None -o cnn.jl -s CLOSESPIDER_PAGECOUNT=200	cnn.jl	154
1	195769	21:42:09	00:07	0.4	83.8	/opt/tljh/user/bin/python /opt/tljh/user/bin/scrapy runspider /opt/tljh/user/lib/python3.10/site-packages/advertools/spider.py -a url_list=https://nytimes.com -a allowed_domains=nytimes.com -a follow_links=True -a exclude_url_params=None -a include_url_params=None -a exclude_url_regex=None -a include_url_regex=None -a css_selectors=None -a xpath_selectors=None -o nyt.jl -s CLOSESPIDER_PAGECOUNT=200	nyt.jl	17