index.html

<script src="http://www.google.com/jsapi" type="text/javascript"></script> 
<script type="text/javascript">google.load("jquery", "1.3.2");</script>

<style type="text/css">
	body {
		font-family: "HelveticaNeue-Light", "Helvetica Neue Light", "Helvetica Neue", Helvetica, Arial, "Lucida Grande", sans-serif; 
		font-weight:300;
		font-size:18px;
		margin-left: auto;
		margin-right: auto;
		width: 1100px;
	}
	
	h1 {
		font-size:32px;
		font-weight:300;
	}
	
	.disclaimerbox {
		background-color: #eee;		
		border: 1px solid #eeeeee;
		border-radius: 10px ;
		-moz-border-radius: 10px ;
		-webkit-border-radius: 10px ;
		padding: 20px;
	}

	video.header-vid {
		height: 140px;
		border: 1px solid black;
		border-radius: 10px ;
		-moz-border-radius: 10px ;
		-webkit-border-radius: 10px ;
	}
	
	img.header-img {
		height: 140px;
		border: 1px solid black;
		border-radius: 10px ;
		-moz-border-radius: 10px ;
		-webkit-border-radius: 10px ;
	}
	
	img.rounded {
		border: 1px solid #eeeeee;
		border-radius: 10px ;
		-moz-border-radius: 10px ;
		-webkit-border-radius: 10px ;
	}
	
	a:link,a:visited
	{
		color: #1367a7;
		text-decoration: none;
	}
	a:hover {
		color: #208799;
	}
	
	td.dl-link {
		height: 160px;
		text-align: center;
		font-size: 22px;
	}
	
	.layered-paper-big { /* modified from: http://css-tricks.com/snippets/css/layered-paper/ */
		box-shadow:
		0px 0px 1px 1px rgba(0,0,0,0.35), /* The top layer shadow */
		5px 5px 0 0px #fff, /* The second layer */
		5px 5px 1px 1px rgba(0,0,0,0.35), /* The second layer shadow */
		10px 10px 0 0px #fff, /* The third layer */
		10px 10px 1px 1px rgba(0,0,0,0.35), /* The third layer shadow */
		15px 15px 0 0px #fff, /* The fourth layer */
		15px 15px 1px 1px rgba(0,0,0,0.35), /* The fourth layer shadow */
		20px 20px 0 0px #fff, /* The fifth layer */
		20px 20px 1px 1px rgba(0,0,0,0.35), /* The fifth layer shadow */
		25px 25px 0 0px #fff, /* The fifth layer */
		25px 25px 1px 1px rgba(0,0,0,0.35); /* The fifth layer shadow */
		margin-left: 10px;
		margin-right: 45px;
	}

	.paper-big { /* modified from: http://css-tricks.com/snippets/css/layered-paper/ */
		box-shadow:
		0px 0px 1px 1px rgba(0,0,0,0.35); /* The top layer shadow */

		margin-left: 10px;
		margin-right: 45px;
	}


	.layered-paper { /* modified from: http://css-tricks.com/snippets/css/layered-paper/ */
		box-shadow:
		0px 0px 1px 1px rgba(0,0,0,0.35), /* The top layer shadow */
		5px 5px 0 0px #fff, /* The second layer */
		5px 5px 1px 1px rgba(0,0,0,0.35), /* The second layer shadow */
		10px 10px 0 0px #fff, /* The third layer */
		10px 10px 1px 1px rgba(0,0,0,0.35); /* The third layer shadow */
		margin-top: 5px;
		margin-left: 10px;
		margin-right: 30px;
		margin-bottom: 5px;
	}
	
	.vert-cent {
		position: relative;
		top: 50%;
		transform: translateY(-50%);
	}
	
	hr
	{
		border: 0;
		height: 1px;
		background-image: linear-gradient(to right, rgba(0, 0, 0, 0), rgba(0, 0, 0, 0.75), rgba(0, 0, 0, 0));
	}
</style>

<html>
<head>
	<title>ASAPNet</title>
	<meta property="og:image" content=""/> <!-- Facebook automatically scrapes this. Go to https://developers.facebook.com/tools/debug/ if you update and want to force Facebook to rescrape. -->
	<meta property="og:title" content="Spatially-Adaptive Pixelwise Networks for Fast Image Translation" />
	<meta property="og:description" content="T. Rott Shaham et al., CVPR 2021." />

	<!-- Get from Google Analytics -->
	<!-- Global site tag (gtag.js) - Google Analytics
	<script async src=""></script> 
	<script>
		window.dataLayer = window.dataLayer || [];
		function gtag(){dataLayer.push(arguments);}
		gtag('js', new Date());

		gtag('config', 'UA-75863369-6');
	</script>
</head> -->

<body>
	<br>
	<center>
		<span style="font-size:36px">Spatially-Adaptive Pixelwise Networks for Fast Image Translation</span><br>
	  	<span style="font-size:25px;line-height:2.0">CVPR 2021</span><br>
		<table align=center width=1100px>
			<table align=center width=1100px>
				<tr>
					<td align=center width=110px>
						<center>
							<span style="font-size:24px"><a href="https://tamarott.github.io/">Tamar Rott Shaham</a></span>
						</center>
					</td>
					<td align=center width=100px>
						<center>
							<span style="font-size:24px"><a href="http://www.mgharbi.com">Micha&euml;l Gharbi</a></span>
						</center>
					</td>
					<td align=center width=100px>
						<center>
							<span style="font-size:24px"><a href="https://richzhang.github.io/">Richard Zhang</a></span>
						</center>
					</td>
					<td align=center width=100px>
						<center>
							<span style="font-size:24px"><a href="https://research.adobe.com/person/eli-shechtman/"> Eli Shechtman </a></span>
						</center>
					</td>
					<td align=center width=100px>
						<center>
							<span style="font-size:24px"><a href="https://tomer.net.technion.ac.il/">Tomer Michaeli</a></span>
						</center>
					</td>
				</tr>
			</table>
			<table align=center width=500px>
				<tr>
					<td align=center width=200px>
						<center>
							<span style="font-size:24px"><a href="https://arxiv.org/pdf/2012.02992.pdf">[Paper]</a></span>
						</center>
					</td>
					<td align=center width=200px>
						<center>
							<span style="font-size:24px"><a href='https://github.com/tamarott/ASAPNet'>[GitHub]</a></span><br>
						</center>
					</td>
					<td align=center width=200px>
						<center>
							<span style="font-size:24px"><a href="https://youtu.be/6-OfZ32CoBE">[Video]</a></span>
						</center>
					</td>
				</tr>
			</table>
		</table>
	</center>

	<center>
		<table align=center width=850px>
			<tr>
				<td width=260px>
					<center>
						<img class="round" style="width:400px" src="./resources/runtime_vs_imgsize_Mpix_v15.png"/>
					</center>
				</td>
			</tr>
		</table>
		<br>
		<table align=center width=850px>
			<tr align=justify>
				<td>
					Our novel model, designed with A Spatially Adaptive Pixelwise Network (ASAPNet) enables generating high-resolution images 
					at significantly lower runtimes than existing methods, while maintaining high visual quality. Particularly, as seen in the plot our 
					model is 2-18x faster than baselines, depending on resolution.
				</td>
			</tr>
		</table>
	</center>

	<hr>

	<table align=center width=850px>
		<center><h1>Abstract</h1></center>
		<tr align=justify>
			<td>
				We introduce a new generator architecture, aimed at fast and efficient high-resolution image-to-image translation. 
				We design the generator to be an extremely lightweight function of the full-resolution image. In fact, we use <b>pixel-wise networks</b>; 
				that is, each pixel is processed independently of others, through a composition of simple affine transformations and nonlinearities. 
				We take three important steps to equip such a seemingly simple function with adequate expressivity. 
				First, the parameters of the pixel-wise networks are <b>spatially varying</b>, so they can represent a broader function
				class than simple 1x1 convolutions. Second, these parameters are <b>predicted</b> by a fast convolutional network that processes an
				aggressively low-resolution representation of the input. Third, we augment the input image by concatenating a sinusoidal encoding of 
				spatial coordinates, which provides an effective inductive bias for generating realistic novel high-frequency image content. 
				As a result, our model is up to 18x faster than state-of-the-art baselines. We achieve this speedup while generating comparable
				visual quality across different image resolutions and translation domains.
			</td>
		</tr>
	</table>
	<br>

	<table align=center width=850px>
		<tr>
			<td width=850px>
				<center>
					<img class="round" style="width:700px" src="./resources/res.png"/>
				</center>
			</td>
		</tr>
	</table>
	<hr>
	
	<center><h1>5-Minutes Video</h1></center>
	<table align=center width=800px>
		<td  width=1080 colspan=7 valign=center align=center style='width:802.5pt;padding:10pt 5.4pt 20pt 5.4pt'>
			<iframe width="560" height="315" src="https://www.youtube.com/embed/6-OfZ32CoBE" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
		</td>
	</table>
	<br>	
	
	<hr>
	<center><h1>Implementation</h1></center>
	<table align=center width=800px>
		<tr><center>
			<span style="font-size:24p	x">&nbsp;<a href='https://github.com/tamarott/ASAPNet'>[GitHub]</a>
			</center>
		</span>
	</table>
	<br>

	<table align=center width=850px>
		<tr>
			<td width=850px>
				<center>
					<img class="round" style="width:700px" src="./resources/architecture_v8.png"/>
				</center>
			</td>
		</tr>
	</table>
	
	<table align=center width=850px>
		<center>
			<tr align=justify>
				<td>
					Our model first processes the input at very low-resolution x<sub>l</sub>, to produce a tensor of weights and
					biases &phi;<sub>p</sub>. These are upsampled back to full-resolution, where they parameterize pixelwise, spatially-varying MLPs
					f<sub>p</sub> that compute the final output y from the high-resolution input x. 
			</tr>
				</td>
		</center>
	</table>

	<hr>
	<table align=center width=600px>
		<center><h1>Paper</h1></center>
		<tr align=left>
			<td><a href=""><img class="layered-paper-big" style="height:175px" src="./resources/ASAPNet-01.png"/></a></td>
			<td><span style="font-size:14pt">T. Rott Shaham, M. Gharbi, R. Zhang, <br>E. Shechtman, T. Michaeli<br>
				<b>Spatially-Adaptive Pixelwise Networks for Fast Image Translation</b><br>
				CVPR 2021<br>
				<!--ArXiv, 2020<br> -->
				<a href="https://arxiv.org/pdf/2012.02992.pdf">[ArXiv]</a> <a href="https://openaccess.thecvf.com/content/CVPR2021/html/Shaham_Spatially-Adaptive_Pixelwise_Networks_for_Fast_Image_Translation_CVPR_2021_paper.html">[CVF]</a> <a href="./resources/SM.pdf">[Supplementals]</a> <a href="./resources/bibtex.txt">[Bibtex]</a>
				<span style="font-size:4pt"><a href=""><br></a>
				</span>
			</td>
		</tr>
	</table>
	<br>
	
	<hr>	
	<table align=center width=900px>
		<center><h1>References</h1></center>
		<tr align=justify>
			<td width=400px>
				<left>
				Xihui Liu, Guojun Yin, Jing Shao, Xiaogang Wang and Hongsheng Li, 
				<b>Learning to Predict Layout-to-image Conditional Convolutions for Semantic Image Synthesis,</b>	
				NeurIPS 2019
<br><br>				
				 Taesung Park, Ming-Yu Liu, Ting-Chun Wang and Jun-Yan Zhu,    
				<b>Semantic Image Synthesis with Spatially-Adaptive Normalization,</b>	
				CVPR 2019	
<br><br>				
				Ting-Chun Wang, Ming-Yu Liu, Jun-Yan Zhu, Andrew Tao, Jan Kautz, Bryan Catanzaro,				
				<b> High-Resolution Image Synthesis and Semantic Manipulation with Conditional GANs,</b>	
				CVPR 2018	
				</left>
<br><br>				
				 Xiaojuan Qi, Qifeng Chen, Jiaya Jia, and Vladlen Koltun,    
				<b>Semi-parametric Image Synthesis,</b>	
				CVPR 2018	
				</left>
<br><br>				
				 Qifeng Chen and Vladlen Koltun,    
				<b>Photographic Image Synthesis with Cascaded Refinement Networks,</b>	
				ICCV 2017	
				</left>
			</td>
		</tr>
	</table>

	<hr>
	<br>

	<table align=center width=900px>
		<tr>
			<td width=400px>
				<left>
					This template was originally made by <a href="http://web.mit.edu/phillipi/">Phillip Isola</a> and <a href="http://richzhang.github.io/">Richard Zhang</a> for a <a href="http://richzhang.github.io/colorization/">colorful</a> ECCV project; the code can be found <a href="https://github.com/richzhang/webpage-template">here</a>.
				</left>
			</td>
		</tr>
	</table>

<br>
</body>
</html>