Add ArXiV

FF93 · Aug 6, 2024 · fb36470 · fb36470
1 parent ce6c0b0
commit fb36470
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 18 deletions.
diff --git a/_bibliography/papers.bib b/_bibliography/papers.bib
@@ -24,7 +24,7 @@ @inproceedings{herrmannlearning
   slides={learning_rnn_slides.pdf},
   preview={learning_rnn_pic.png},
   selected={true},
-  arxiv={https://arxiv.org/abs/2403.11998}
+  arxiv={2403.11998}
 }
 
 @inproceedings{zhuge2024language,
@@ -39,7 +39,8 @@ @inproceedings{zhuge2024language
   poster={gptswarm-poster.pdf},
   slides={GPTSwarm_slides.pdf},
   preview={gptswarm_pic.png},
-  selected={true}
+  selected={true},
+  arxiv={2402.16823}
 }
 
 @article{wang2024scaling,
@@ -50,7 +51,8 @@ @article{wang2024scaling
   abbr={Preprint},
   abstract={The Value Iteration Network (VIN) is an end-to-end differentiable architecture that performs value iteration on a latent MDP for planning in reinforcement learning (RL). However, VINs struggle to scale to long-term and large-scale planning tasks, such as navigating a 100 x 100 maze -- a task which typically requires thousands of planning steps to solve. We observe that this deficiency is due to two issues: the representation capacity of the latent MDP and the planning module's depth. We address these by augmenting the latent MDP with a dynamic transition kernel, dramatically improving its representational capacity, and, to mitigate the vanishing gradient problem, introducing an "adaptive highway loss" that constructs skip connections to improve gradient flow. We evaluate our method on both 2D maze navigation environments and the ViZDoom 3D navigation benchmark. We find that our new method, named Dynamic Transition VIN (DT-VIN), easily scales to 5000 layers and casually solves challenging versions of the above tasks. Altogether, we believe that DT-VIN represents a concrete step forward in performing long-term large-scale planning in RL environments.},
   bibtex_show={true},
-  preview={scaling5000_pic.png}
+  preview={scaling5000_pic.png},
+  arxiv={2406.08404}
 }
 
 @article{alhakami2024towards,
@@ -61,7 +63,8 @@ @article{alhakami2024towards
   abbr={Preprint},
   abstract={Artificial intelligence has made great strides in many areas lately, yet it has had comparatively little success in general-use robotics. We believe one of the reasons for this is the disconnect between traditional robotic design and the properties needed for open-ended, creativity-based AI systems. To that end, we, taking selective inspiration from nature, build a robust, partially soft robotic limb with a large action space, rich sensory data stream from multiple cameras, and the ability to connect with others to enhance the action space and data stream. As a proof of concept, we train two contemporary machine learning algorithms to perform a simple target-finding task. Altogether, we believe that this design serves as a first step to building a robot tailor-made for achieving artificial general intelligence.},
   bibtex_show={true},
-  preview={robot_pic.png}
+  preview={robot_pic.png},
+  arxiv={2404.08093}
 }
 
 @article{liu2024faster,
@@ -73,7 +76,8 @@ @article{liu2024faster
   abstract={We explore the role of attention mechanism during inference in text-conditional diffusion models. Empirical observations suggest that cross-attention outputs converge to a fixed point after several inference steps. The convergence time naturally divides the entire inference process into two phases: an initial phase for planning text-oriented visual semantics, which are then translated into images in a subsequent fidelity-improving phase. Cross-attention is essential in the initial phase but almost irrelevant thereafter. However, self-attention initially plays a minor role but becomes crucial in the second phase. These findings yield a simple and training-free method known as temporally gating the attention (TGATE), which efficiently generates images by caching and reusing attention outputs at scheduled time steps. Experimental results show when widely applied to various existing text-conditional diffusion models, TGATE accelerates these models by 10%-50%.},
   code={https://github.com/HaozheLiu-ST/T-GATE},
   bibtex_show={true},
-  preview={tgate_pic.png}
+  preview={tgate_pic.png},
+  arxiv={2404.02747}
 }
 
 @inproceedings{wanghighway,
@@ -86,7 +90,8 @@ @inproceedings{wanghighway
   code={https://github.com/wangyuhuix/HighwayVIN},
   bibtex_show={true},
   poster={poster_highway_vin.pdf},
-  preview={highway_vin_pic.png}
+  preview={highway_vin_pic.png},
+  arxiv={2406.03485}
 }
 
 @article{wang2024highway,
@@ -97,7 +102,8 @@ @article{wang2024highway
   abbr={Preprint},
   abstract={Learning from multi-step off-policy data collected by a set of policies is a core problem of reinforcement learning (RL). Approaches based on importance sampling (IS) often suffer from large variances due to products of IS ratios. Typical IS-free methods, such as n-step Q-learning, look ahead for n time steps along the trajectory of actions (where n is called the lookahead depth) and utilize off-policy data directly without any additional adjustment. They work well for proper choices of n. We show, however, that such IS-free methods underestimate the optimal value function (VF), especially for large n, restricting their capacity to efficiently utilize information from distant future time steps. To overcome this problem, we introduce a novel, IS-free, multi-step off-policy method that avoids the underestimation issue and converges to the optimal VF. At its core lies a simple but non-trivial highway gate, which controls the information flow from the distant future by comparing it to a threshold. The highway gate guarantees convergence to the optimal VF for arbitrary n and arbitrary behavioral policies. It gives rise to a novel family of off-policy RL algorithms that safely learn even when n is very large, facilitating rapid credit assignment from the far future to the past. On tasks with greatly delayed rewards, including video games where the reward is given only at the end of the game, our new methods outperform many existing multi-step off-policy algorithms.},
   bibtex_show={true},
-  preview={highway_pic.png}
+  preview={highway_pic.png},
+  arxiv={2405.18289}
 }
 
 #2023
@@ -117,7 +123,8 @@ @inproceedings{faccio2023goal
   poster={gogepo_poster.pdf},
   preview={gogepo_pic.png},
   slides={gogepo_slides.pdf},
-  selected={true}
+  selected={true},
+  arxiv={2207.01570}
 }
 
 @article{zhuge2023mindstorms,
@@ -132,7 +139,8 @@ @article{zhuge2023mindstorms
   poster={nlsom_poster.pdf},
   slides={nlsom_slides.pdf},
   preview={nlsom_pic.png},
-  selected={true}
+  selected={true},
+  arxiv={2305.17066}
 }
 
 
@@ -147,7 +155,8 @@ @inproceedings{liu2023learning
   code={https://github.com/AI-Initiative-KAUST/VideoRLCS},
   bibtex_show={true},
   poster={},
-  preview={critical_states_pic.png}
+  preview={critical_states_pic.png},
+  arxiv={2308.07795}
 }
 
 @article{stanic2023languini,
@@ -159,7 +168,8 @@ @article{stanic2023languini
   abstract={The Languini Kitchen serves as both a research collective and codebase designed to empower researchers with limited computational resources to contribute meaningfully to the field of language modelling. We introduce an experimental protocol that enables model comparisons based on equivalent compute, measured in accelerator hours. The number of tokens on which a model is trained is defined by the model's throughput and the chosen compute class. Notably, this approach avoids constraints on critical hyperparameters which affect total parameters or floating-point operations. For evaluation, we pre-process an existing large, diverse, and high-quality dataset of books that surpasses existing academic benchmarks in quality, diversity, and document length. On it, we compare methods based on their empirical scaling trends which are estimated through experiments at various levels of compute. This work also provides two baseline models: a feed-forward model derived from the GPT-2 architecture and a recurrent model in the form of a novel LSTM with ten-fold throughput. While the GPT baseline achieves better perplexity throughout all our levels of compute, our LSTM baseline exhibits a predictable and more favourable scaling law. This is due to the improved throughput and the need for fewer training tokens to achieve the same decrease in test perplexity. Extrapolating the scaling laws leads of both models results in an intersection at roughly 50,000 accelerator hours. We hope this work can serve as the foundation for meaningful and reproducible language modelling research.},
   code={https://github.com/languini-kitchen/languini-kitchen},
   bibtex_show={true},
-  preview={languini_pic.png}
+  preview={languini_pic.png},
+  arxiv={2309.11197}
 }
 
 #2022
@@ -177,7 +187,8 @@ @article{irie2022neural
   bibtex_show={true},
   poster={poster_neural_ode.pdf},
   slides={slides_neural_ode.pdf},
-  preview={neural_fwp_pic.png}
+  preview={neural_fwp_pic.png},
+  arxiv={2206.01649}
 }
 
 @inproceedings{vstrupl2022reward,
@@ -193,7 +204,8 @@ @inproceedings{vstrupl2022reward
   code={https://github.com/dylanashley/reward-weighted-regression},
   bibtex_show={true},
   poster={rwr_poster.pdf},
-  preview={rwr_pic.png}
+  preview={rwr_pic.png},
+  arxiv={2107.09088}
 }
 
 @article{sajid2022bayesian,
@@ -210,7 +222,8 @@ @article{sajid2022bayesian
   code={https://github.com/ucbtns/renyibounds},
   bibtex_show={true},
   poster={Renyibound_poster.pdf},
-  preview={renyi_pic.png}
+  preview={renyi_pic.png},
+  arxiv={2107.05438}
 }
 
 @article{faccio2022general,
@@ -223,7 +236,8 @@ @article{faccio2022general
   code={https://github.com/IDSIA/policyevaluator},
   bibtex_show={true},
   poster={evaluator_poster.pdf},
-  preview={evaluator_pic.png}
+  preview={evaluator_pic.png},
+  arxiv={2207.01566}
 }
 
 @article{vstrupl2022upside,
@@ -236,7 +250,8 @@ @article{vstrupl2022upside
   code={https://github.com/struplm/UDRL-GCSL-counterexample},
   bibtex_show={true},
   poster={udrl_poster.pdf},
-  preview={udrl_pic.png}
+  preview={udrl_pic.png},
+  arxiv={2205.06595}
 }
 
 #2021
@@ -252,7 +267,8 @@ @inproceedings{faccioparameter
   bibtex_show={true},
   poster={poster_pvf.pdf},
   slides={slides_pvf.pdf},
-  preview={pvf_pic.png}
+  preview={pvf_pic.png},
+  arxiv={2006.09226}
 }
 
 #2018
@@ -270,5 +286,6 @@ @article{metelli2018policy
   poster={poster_POIS.pdf},
   slides={slides_POIS.pdf},
   preview={pois_pic.png},
-  selected={true}
+  selected={true},
+  arxiv={1809.06098}
 }
diff --git a/assets/pdf/learning_rnn_slides.pdf b/assets/pdf/learning_rnn_slides.pdf