Skip to content

Commit

Permalink
deploy: 4d8c521
Browse files Browse the repository at this point in the history
  • Loading branch information
yxdyc committed Aug 2, 2024
1 parent 678d665 commit 940b941
Show file tree
Hide file tree
Showing 15 changed files with 592 additions and 77 deletions.
34 changes: 19 additions & 15 deletions _modules/data_juicer/ops/filter/video_aesthetics_filter.html
Original file line number Diff line number Diff line change
Expand Up @@ -239,23 +239,27 @@ <h1>Source code for data_juicer.ops.filter.video_aesthetics_filter</h1><div clas
<span class="n">sample</span><span class="p">[</span><span class="n">Fields</span><span class="o">.</span><span class="n">context</span><span class="p">][</span><span class="n">sampled_frames_key</span><span class="p">]</span> <span class="o">=</span> <span class="n">frames</span>
<span class="n">frame_images</span> <span class="o">=</span> <span class="p">[</span><span class="n">frame</span><span class="o">.</span><span class="n">to_image</span><span class="p">()</span> <span class="k">for</span> <span class="n">frame</span> <span class="ow">in</span> <span class="n">frames</span><span class="p">]</span>

<span class="c1"># compute aesthetics_scores</span>
<span class="n">model</span><span class="p">,</span> <span class="n">processor</span> <span class="o">=</span> <span class="n">get_model</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">model_key</span><span class="p">,</span> <span class="n">rank</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">use_cuda</span><span class="p">())</span>
<span class="n">inputs</span> <span class="o">=</span> <span class="n">processor</span><span class="p">(</span><span class="n">images</span><span class="o">=</span><span class="n">frame_images</span><span class="p">,</span>
<span class="n">return_tensors</span><span class="o">=</span><span class="s1">&#39;pt&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">model</span><span class="o">.</span><span class="n">device</span><span class="p">)</span>
<span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
<span class="n">outputs</span> <span class="o">=</span> <span class="n">model</span><span class="p">(</span><span class="o">**</span><span class="n">inputs</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">need_normalized_by_ten</span><span class="p">:</span>
<span class="n">aesthetics_score</span> <span class="o">=</span> <span class="n">outputs</span><span class="o">.</span><span class="n">logits</span> <span class="o">/</span> <span class="mf">10.0</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">aesthetics_score</span> <span class="o">=</span> <span class="n">outputs</span><span class="o">.</span><span class="n">logits</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">frame_images</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="c1"># compute aesthetics_scores</span>
<span class="n">model</span><span class="p">,</span> <span class="n">processor</span> <span class="o">=</span> <span class="n">get_model</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">model_key</span><span class="p">,</span> <span class="n">rank</span><span class="o">=</span><span class="n">rank</span><span class="p">)</span>
<span class="n">inputs</span> <span class="o">=</span> <span class="n">processor</span><span class="p">(</span><span class="n">images</span><span class="o">=</span><span class="n">frame_images</span><span class="p">,</span>
<span class="n">return_tensors</span><span class="o">=</span><span class="s1">&#39;pt&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">model</span><span class="o">.</span><span class="n">device</span><span class="p">)</span>
<span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
<span class="n">outputs</span> <span class="o">=</span> <span class="n">model</span><span class="p">(</span><span class="o">**</span><span class="n">inputs</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">need_normalized_by_ten</span><span class="p">:</span>
<span class="n">aesthetics_score</span> <span class="o">=</span> <span class="n">outputs</span><span class="o">.</span><span class="n">logits</span> <span class="o">/</span> <span class="mf">10.0</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">aesthetics_score</span> <span class="o">=</span> <span class="n">outputs</span><span class="o">.</span><span class="n">logits</span>

<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">reduce_mode</span> <span class="o">==</span> <span class="s1">&#39;avg&#39;</span><span class="p">:</span>
<span class="n">aesthetics_score</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">aesthetics_score</span><span class="o">.</span><span class="n">mean</span><span class="p">())</span>
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">reduce_mode</span> <span class="o">==</span> <span class="s1">&#39;max&#39;</span><span class="p">:</span>
<span class="n">aesthetics_score</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">aesthetics_score</span><span class="o">.</span><span class="n">max</span><span class="p">())</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">reduce_mode</span> <span class="o">==</span> <span class="s1">&#39;avg&#39;</span><span class="p">:</span>
<span class="n">aesthetics_score</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">aesthetics_score</span><span class="o">.</span><span class="n">mean</span><span class="p">())</span>
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">reduce_mode</span> <span class="o">==</span> <span class="s1">&#39;max&#39;</span><span class="p">:</span>
<span class="n">aesthetics_score</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">aesthetics_score</span><span class="o">.</span><span class="n">max</span><span class="p">())</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">aesthetics_score</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">aesthetics_score</span><span class="o">.</span><span class="n">min</span><span class="p">())</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">aesthetics_score</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">aesthetics_score</span><span class="o">.</span><span class="n">min</span><span class="p">())</span>
<span class="n">aesthetics_score</span> <span class="o">=</span> <span class="mf">0.0</span>

<span class="n">aesthetics_scores</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">aesthetics_score</span><span class="p">)</span>

<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;aesthetics_score: </span><span class="si">{</span><span class="n">aesthetics_scores</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
Expand Down
6 changes: 3 additions & 3 deletions _modules/data_juicer/ops/filter/video_duration_filter.html
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ <h1>Source code for data_juicer.ops.filter.video_duration_filter</h1><div class=
<span></span><span class="kn">import</span> <span class="nn">sys</span>

<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">jsonargparse.typing</span> <span class="kn">import</span> <span class="n">NonNegativeInt</span>
<span class="kn">from</span> <span class="nn">jsonargparse.typing</span> <span class="kn">import</span> <span class="n">NonNegativeFloat</span>

<span class="kn">from</span> <span class="nn">data_juicer.utils.constant</span> <span class="kn">import</span> <span class="n">Fields</span><span class="p">,</span> <span class="n">StatsKeys</span>
<span class="kn">from</span> <span class="nn">data_juicer.utils.mm_utils</span> <span class="kn">import</span> <span class="p">(</span><span class="n">close_video</span><span class="p">,</span> <span class="n">load_data_with_context</span><span class="p">,</span>
Expand All @@ -104,8 +104,8 @@ <h1>Source code for data_juicer.ops.filter.video_duration_filter</h1><div class=
<span class="sd"> &quot;&quot;&quot;</span>

<div class="viewcode-block" id="VideoDurationFilter.__init__"><a class="viewcode-back" href="../../../../data_juicer.ops.filter.html#data_juicer.ops.filter.VideoDurationFilter.__init__">[docs]</a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
<span class="n">min_duration</span><span class="p">:</span> <span class="n">NonNegativeInt</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span>
<span class="n">max_duration</span><span class="p">:</span> <span class="n">NonNegativeInt</span> <span class="o">=</span> <span class="n">sys</span><span class="o">.</span><span class="n">maxsize</span><span class="p">,</span>
<span class="n">min_duration</span><span class="p">:</span> <span class="n">NonNegativeFloat</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span>
<span class="n">max_duration</span><span class="p">:</span> <span class="n">NonNegativeFloat</span> <span class="o">=</span> <span class="n">sys</span><span class="o">.</span><span class="n">maxsize</span><span class="p">,</span>
<span class="n">any_or_all</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">&#39;any&#39;</span><span class="p">,</span>
<span class="o">*</span><span class="n">args</span><span class="p">,</span>
<span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,23 +255,26 @@ <h1>Source code for data_juicer.ops.filter.video_frames_text_similarity_filter</
<span class="n">image</span> <span class="o">=</span> <span class="n">ImageOps</span><span class="o">.</span><span class="n">flip</span><span class="p">(</span><span class="n">image</span><span class="p">)</span>
<span class="n">video_frame_images_chunk</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">image</span><span class="p">)</span>

<span class="n">inputs</span> <span class="o">=</span> <span class="n">processor</span><span class="p">(</span><span class="n">text</span><span class="o">=</span><span class="n">text_chunk</span><span class="p">,</span>
<span class="n">images</span><span class="o">=</span><span class="n">video_frame_images_chunk</span><span class="p">,</span>
<span class="n">return_tensors</span><span class="o">=</span><span class="s1">&#39;pt&#39;</span><span class="p">,</span>
<span class="n">truncation</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="n">max_length</span><span class="o">=</span><span class="n">model</span><span class="o">.</span><span class="n">config</span><span class="o">.</span><span class="n">text_config</span><span class="o">.</span>
<span class="n">max_position_embeddings</span><span class="p">,</span>
<span class="n">padding</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">model</span><span class="o">.</span><span class="n">device</span><span class="p">)</span>

<span class="n">outputs</span> <span class="o">=</span> <span class="n">model</span><span class="p">(</span><span class="o">**</span><span class="n">inputs</span><span class="p">)</span>
<span class="n">chunk_logits</span> <span class="o">=</span> <span class="n">outputs</span><span class="o">.</span><span class="n">logits_per_text</span> <span class="o">/</span> <span class="mf">100.0</span>

<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">reduce_mode</span> <span class="o">==</span> <span class="s1">&#39;avg&#39;</span><span class="p">:</span>
<span class="n">chunk_similarity</span> <span class="o">=</span> <span class="n">chunk_logits</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">reduce_mode</span> <span class="o">==</span> <span class="s1">&#39;max&#39;</span><span class="p">:</span>
<span class="n">chunk_similarity</span> <span class="o">=</span> <span class="n">chunk_logits</span><span class="o">.</span><span class="n">max</span><span class="p">()</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">video_frame_images_chunk</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">inputs</span> <span class="o">=</span> <span class="n">processor</span><span class="p">(</span><span class="n">text</span><span class="o">=</span><span class="n">text_chunk</span><span class="p">,</span>
<span class="n">images</span><span class="o">=</span><span class="n">video_frame_images_chunk</span><span class="p">,</span>
<span class="n">return_tensors</span><span class="o">=</span><span class="s1">&#39;pt&#39;</span><span class="p">,</span>
<span class="n">truncation</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="n">max_length</span><span class="o">=</span><span class="n">model</span><span class="o">.</span><span class="n">config</span><span class="o">.</span><span class="n">text_config</span><span class="o">.</span>
<span class="n">max_position_embeddings</span><span class="p">,</span>
<span class="n">padding</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">model</span><span class="o">.</span><span class="n">device</span><span class="p">)</span>

<span class="n">outputs</span> <span class="o">=</span> <span class="n">model</span><span class="p">(</span><span class="o">**</span><span class="n">inputs</span><span class="p">)</span>
<span class="n">chunk_logits</span> <span class="o">=</span> <span class="n">outputs</span><span class="o">.</span><span class="n">logits_per_text</span> <span class="o">/</span> <span class="mf">100.0</span>

<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">reduce_mode</span> <span class="o">==</span> <span class="s1">&#39;avg&#39;</span><span class="p">:</span>
<span class="n">chunk_similarity</span> <span class="o">=</span> <span class="n">chunk_logits</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">reduce_mode</span> <span class="o">==</span> <span class="s1">&#39;max&#39;</span><span class="p">:</span>
<span class="n">chunk_similarity</span> <span class="o">=</span> <span class="n">chunk_logits</span><span class="o">.</span><span class="n">max</span><span class="p">()</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">chunk_similarity</span> <span class="o">=</span> <span class="n">chunk_logits</span><span class="o">.</span><span class="n">min</span><span class="p">()</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">chunk_similarity</span> <span class="o">=</span> <span class="n">chunk_logits</span><span class="o">.</span><span class="n">min</span><span class="p">()</span>
<span class="n">chunk_similarity</span> <span class="o">=</span> <span class="mf">0.0</span>

<span class="n">similarity</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">float</span><span class="p">(</span><span class="n">chunk_similarity</span><span class="p">))</span>
<span class="n">offset</span> <span class="o">+=</span> <span class="n">count</span>
Expand Down
Loading

0 comments on commit 940b941

Please sign in to comment.