Skip to content

Commit

Permalink
Add ArXiV
Browse files Browse the repository at this point in the history
  • Loading branch information
FF93 committed Aug 6, 2024
1 parent ce6c0b0 commit fb36470
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 18 deletions.
53 changes: 35 additions & 18 deletions _bibliography/papers.bib
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ @inproceedings{herrmannlearning
slides={learning_rnn_slides.pdf},
preview={learning_rnn_pic.png},
selected={true},
arxiv={https://arxiv.org/abs/2403.11998}
arxiv={2403.11998}
}

@inproceedings{zhuge2024language,
Expand All @@ -39,7 +39,8 @@ @inproceedings{zhuge2024language
poster={gptswarm-poster.pdf},
slides={GPTSwarm_slides.pdf},
preview={gptswarm_pic.png},
selected={true}
selected={true},
arxiv={2402.16823}
}

@article{wang2024scaling,
Expand All @@ -50,7 +51,8 @@ @article{wang2024scaling
abbr={Preprint},
abstract={The Value Iteration Network (VIN) is an end-to-end differentiable architecture that performs value iteration on a latent MDP for planning in reinforcement learning (RL). However, VINs struggle to scale to long-term and large-scale planning tasks, such as navigating a 100 x 100 maze -- a task which typically requires thousands of planning steps to solve. We observe that this deficiency is due to two issues: the representation capacity of the latent MDP and the planning module's depth. We address these by augmenting the latent MDP with a dynamic transition kernel, dramatically improving its representational capacity, and, to mitigate the vanishing gradient problem, introducing an "adaptive highway loss" that constructs skip connections to improve gradient flow. We evaluate our method on both 2D maze navigation environments and the ViZDoom 3D navigation benchmark. We find that our new method, named Dynamic Transition VIN (DT-VIN), easily scales to 5000 layers and casually solves challenging versions of the above tasks. Altogether, we believe that DT-VIN represents a concrete step forward in performing long-term large-scale planning in RL environments.},
bibtex_show={true},
preview={scaling5000_pic.png}
preview={scaling5000_pic.png},
arxiv={2406.08404}
}

@article{alhakami2024towards,
Expand All @@ -61,7 +63,8 @@ @article{alhakami2024towards
abbr={Preprint},
abstract={Artificial intelligence has made great strides in many areas lately, yet it has had comparatively little success in general-use robotics. We believe one of the reasons for this is the disconnect between traditional robotic design and the properties needed for open-ended, creativity-based AI systems. To that end, we, taking selective inspiration from nature, build a robust, partially soft robotic limb with a large action space, rich sensory data stream from multiple cameras, and the ability to connect with others to enhance the action space and data stream. As a proof of concept, we train two contemporary machine learning algorithms to perform a simple target-finding task. Altogether, we believe that this design serves as a first step to building a robot tailor-made for achieving artificial general intelligence.},
bibtex_show={true},
preview={robot_pic.png}
preview={robot_pic.png},
arxiv={2404.08093}
}

@article{liu2024faster,
Expand All @@ -73,7 +76,8 @@ @article{liu2024faster
abstract={We explore the role of attention mechanism during inference in text-conditional diffusion models. Empirical observations suggest that cross-attention outputs converge to a fixed point after several inference steps. The convergence time naturally divides the entire inference process into two phases: an initial phase for planning text-oriented visual semantics, which are then translated into images in a subsequent fidelity-improving phase. Cross-attention is essential in the initial phase but almost irrelevant thereafter. However, self-attention initially plays a minor role but becomes crucial in the second phase. These findings yield a simple and training-free method known as temporally gating the attention (TGATE), which efficiently generates images by caching and reusing attention outputs at scheduled time steps. Experimental results show when widely applied to various existing text-conditional diffusion models, TGATE accelerates these models by 10%-50%.},
code={https://github.com/HaozheLiu-ST/T-GATE},
bibtex_show={true},
preview={tgate_pic.png}
preview={tgate_pic.png},
arxiv={2404.02747}
}

@inproceedings{wanghighway,
Expand All @@ -86,7 +90,8 @@ @inproceedings{wanghighway
code={https://github.com/wangyuhuix/HighwayVIN},
bibtex_show={true},
poster={poster_highway_vin.pdf},
preview={highway_vin_pic.png}
preview={highway_vin_pic.png},
arxiv={2406.03485}
}

@article{wang2024highway,
Expand All @@ -97,7 +102,8 @@ @article{wang2024highway
abbr={Preprint},
abstract={Learning from multi-step off-policy data collected by a set of policies is a core problem of reinforcement learning (RL). Approaches based on importance sampling (IS) often suffer from large variances due to products of IS ratios. Typical IS-free methods, such as n-step Q-learning, look ahead for n time steps along the trajectory of actions (where n is called the lookahead depth) and utilize off-policy data directly without any additional adjustment. They work well for proper choices of n. We show, however, that such IS-free methods underestimate the optimal value function (VF), especially for large n, restricting their capacity to efficiently utilize information from distant future time steps. To overcome this problem, we introduce a novel, IS-free, multi-step off-policy method that avoids the underestimation issue and converges to the optimal VF. At its core lies a simple but non-trivial highway gate, which controls the information flow from the distant future by comparing it to a threshold. The highway gate guarantees convergence to the optimal VF for arbitrary n and arbitrary behavioral policies. It gives rise to a novel family of off-policy RL algorithms that safely learn even when n is very large, facilitating rapid credit assignment from the far future to the past. On tasks with greatly delayed rewards, including video games where the reward is given only at the end of the game, our new methods outperform many existing multi-step off-policy algorithms.},
bibtex_show={true},
preview={highway_pic.png}
preview={highway_pic.png},
arxiv={2405.18289}
}

#2023
Expand All @@ -117,7 +123,8 @@ @inproceedings{faccio2023goal
poster={gogepo_poster.pdf},
preview={gogepo_pic.png},
slides={gogepo_slides.pdf},
selected={true}
selected={true},
arxiv={2207.01570}
}

@article{zhuge2023mindstorms,
Expand All @@ -132,7 +139,8 @@ @article{zhuge2023mindstorms
poster={nlsom_poster.pdf},
slides={nlsom_slides.pdf},
preview={nlsom_pic.png},
selected={true}
selected={true},
arxiv={2305.17066}
}


Expand All @@ -147,7 +155,8 @@ @inproceedings{liu2023learning
code={https://github.com/AI-Initiative-KAUST/VideoRLCS},
bibtex_show={true},
poster={},
preview={critical_states_pic.png}
preview={critical_states_pic.png},
arxiv={2308.07795}
}

@article{stanic2023languini,
Expand All @@ -159,7 +168,8 @@ @article{stanic2023languini
abstract={The Languini Kitchen serves as both a research collective and codebase designed to empower researchers with limited computational resources to contribute meaningfully to the field of language modelling. We introduce an experimental protocol that enables model comparisons based on equivalent compute, measured in accelerator hours. The number of tokens on which a model is trained is defined by the model's throughput and the chosen compute class. Notably, this approach avoids constraints on critical hyperparameters which affect total parameters or floating-point operations. For evaluation, we pre-process an existing large, diverse, and high-quality dataset of books that surpasses existing academic benchmarks in quality, diversity, and document length. On it, we compare methods based on their empirical scaling trends which are estimated through experiments at various levels of compute. This work also provides two baseline models: a feed-forward model derived from the GPT-2 architecture and a recurrent model in the form of a novel LSTM with ten-fold throughput. While the GPT baseline achieves better perplexity throughout all our levels of compute, our LSTM baseline exhibits a predictable and more favourable scaling law. This is due to the improved throughput and the need for fewer training tokens to achieve the same decrease in test perplexity. Extrapolating the scaling laws leads of both models results in an intersection at roughly 50,000 accelerator hours. We hope this work can serve as the foundation for meaningful and reproducible language modelling research.},
code={https://github.com/languini-kitchen/languini-kitchen},
bibtex_show={true},
preview={languini_pic.png}
preview={languini_pic.png},
arxiv={2309.11197}
}

#2022
Expand All @@ -177,7 +187,8 @@ @article{irie2022neural
bibtex_show={true},
poster={poster_neural_ode.pdf},
slides={slides_neural_ode.pdf},
preview={neural_fwp_pic.png}
preview={neural_fwp_pic.png},
arxiv={2206.01649}
}

@inproceedings{vstrupl2022reward,
Expand All @@ -193,7 +204,8 @@ @inproceedings{vstrupl2022reward
code={https://github.com/dylanashley/reward-weighted-regression},
bibtex_show={true},
poster={rwr_poster.pdf},
preview={rwr_pic.png}
preview={rwr_pic.png},
arxiv={2107.09088}
}

@article{sajid2022bayesian,
Expand All @@ -210,7 +222,8 @@ @article{sajid2022bayesian
code={https://github.com/ucbtns/renyibounds},
bibtex_show={true},
poster={Renyibound_poster.pdf},
preview={renyi_pic.png}
preview={renyi_pic.png},
arxiv={2107.05438}
}

@article{faccio2022general,
Expand All @@ -223,7 +236,8 @@ @article{faccio2022general
code={https://github.com/IDSIA/policyevaluator},
bibtex_show={true},
poster={evaluator_poster.pdf},
preview={evaluator_pic.png}
preview={evaluator_pic.png},
arxiv={2207.01566}
}

@article{vstrupl2022upside,
Expand All @@ -236,7 +250,8 @@ @article{vstrupl2022upside
code={https://github.com/struplm/UDRL-GCSL-counterexample},
bibtex_show={true},
poster={udrl_poster.pdf},
preview={udrl_pic.png}
preview={udrl_pic.png},
arxiv={2205.06595}
}

#2021
Expand All @@ -252,7 +267,8 @@ @inproceedings{faccioparameter
bibtex_show={true},
poster={poster_pvf.pdf},
slides={slides_pvf.pdf},
preview={pvf_pic.png}
preview={pvf_pic.png},
arxiv={2006.09226}
}

#2018
Expand All @@ -270,5 +286,6 @@ @article{metelli2018policy
poster={poster_POIS.pdf},
slides={slides_POIS.pdf},
preview={pois_pic.png},
selected={true}
selected={true},
arxiv={1809.06098}
}
Binary file modified assets/pdf/learning_rnn_slides.pdf
Binary file not shown.

0 comments on commit fb36470

Please sign in to comment.