index.html

<!DOCTYPE html>
<html lang="en">
    <head>
        <meta charset="utf-8">
        <title>TruX</title>
        <meta content="width=device-width, initial-scale=1.0" name="viewport">
        <meta content="Free Website Template" name="keywords">
        <meta content="Free Website Template" name="description">

        <!-- Favicon -->
        <link href="img/favicon.ico" rel="icon">

        <!-- Google Font -->
        <link href="https://fonts.googleapis.com/css2?family=Open+Sans:wght@300;400;600;700;800&display=swap" rel="stylesheet">

        <!-- CSS Libraries -->
        <link href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" rel="stylesheet">
        <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.10.0/css/all.min.css" rel="stylesheet">
        <link href="lib/animate/animate.min.css" rel="stylesheet">
        <link href="lib/flaticon/font/flaticon.css" rel="stylesheet"> 
        <link href="lib/owlcarousel/assets/owl.carousel.min.css" rel="stylesheet">
        <link href="lib/lightbox/css/lightbox.min.css" rel="stylesheet">

        <!-- Template Stylesheet -->
        <link href="css/style.css" rel="stylesheet">
    </head>

    <body>
        <!-- Top Bar Start -->
        <div class="top-bar d-none d-md-block">
            <div class="container-fluid">
                <div class="row">
                    <div class="col-md-8">
                        <div class="top-bar-left">
                            <div class="text">
                                
                                <h2>SnT, University of Luxembourg</h2>
                                
                            </div>
                            
                        </div>
                    </div>
                    <div class="col-md-4">
                        <div class="top-bar-right">
                            
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <!-- Top Bar End -->

        <!-- Nav Bar Start -->
        <div class="navbar navbar-expand-lg bg-dark navbar-dark">
            <div class="container-fluid">
                <a href="index.html" class="navbar-brand">T<span>ru</span>X</a>
                <button type="button" class="navbar-toggler" data-toggle="collapse" data-target="#navbarCollapse">
                    <span class="navbar-toggler-icon"></span>
                </button>

                <div class="collapse navbar-collapse justify-content-between" id="navbarCollapse">
                    <div class="navbar-nav ml-auto">
                        
                        <a href="index.html" class="nav-item nav-link active">TOOS</a>
                        <a href="contact.html" class="nav-item nav-link">Contact</a>
                    </div>
                </div>
            </div>
        </div>
        <!-- Nav Bar End -->


        <!-- Page Header Start -->
        <div class="page-header">
            <div class="container">
                <div class="row">
                    <div class="col-12">
                        <h2>TruX Open Online Seminars</h2>
                    </div>
                  
                </div>
            </div>
        </div>
        <!-- Page Header End -->


        <!-- About Start -->
        <div class="about wow fadeInUp" data-wow-delay="0.1s">
            <div class="container">
                <div class="row align-items-center">
                    <div class="col-lg-5 col-md-6">
                        <div class="about-img">
                            <img src="img/toos.png" alt="Image">
                        </div>
                    </div>
                    <div class="col-lg-7 col-md-6">
                        <div class="section-header text-left">
                            
                            <h2>Welcome to TOOS!</h2>
                        </div>
                        <div class="about-text">
                            <p>
                                TruX Open Online Seminars serve as a vibrant platform where researchers gather to discuss the latest advancements in software security, software repair, and cutting-edge explainable software techniques. 
                            </p>
                            <p>
                                These sessions offer a valuable opportunity for young researchers to share their findings and engage in insightful discussions. Covering a wide range of software-related topics, these seminars provide an important space for experts and enthusiasts to explore new trends, exchange ideas, and create innovative solutions. By bringing together diverse perspectives, these seminars significantly contribute to shaping the future of software technologies, making them more dependable, robust, and effective.
                            </p>
                            
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <!-- About End -->
		
		
		<!-- Prof Start -->
        <div class="prof">
            <div class="container">
                <div class="section-header text-center">
                    <p>TruX</p>
                    <h2>Professors</h2>
                </div>
                <div class="prof-img-frame">
				
				
                    <div class="col-lg-5 col-md-6 col-sm-4 col-xs-4">
                        <div class="prof-item">
                            <div class="prof-img">
							    <a href="https://bissyande.github.io/">
                                    <img src="img/bissyande.jpg" alt="Image"> 
								</a>
                            </div>
                            <div class="prof-text">
                                <h2>Prof. Dr. Tegawendé Bissyandé</h2>
                                
                            </div>
                        </div>
                    </div>
					
                    <div class="col-lg-5 col-md-6 col-sm-4 col-xs-4">
                        <div class="prof-item">
                            <div class="prof-img">
							    <a href="https://jacquesklein2302.github.io/">
                                    <img src="img/jacques.jpg" alt="Image">
                                </a>                           
                            </div>
                            <div class="prof-text">
                                <h2>Prof. Dr. Jacques Klein</h2>
                                
                            </div>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <!-- Prof End -->
		

        <!-- Upcoming Events Start -->
		
		<div class="col-lg-12 col-md-10 col-sm-10 col-xs-10 upcomingevent wow zoomIn" data-wow-delay="0.1s">
            <div class="container">
			    
				<div class="frame-title">
					<h2>Upcoming Seminars<h2>
				</div>

				<div class="event-frame">
				   		
				    <div class="event">
						<div class="presenter-details">
							<img src="img/aashish_yadavally.jpg"> 
							<h5> Aashish Yadavally </h5>
							<p> UT Dallas </p>
						</div>	
						<div class="event-info">
							<h3>Contextuality of Code Representation Learning</h3>
								<p> Advanced machine learning models (ML) have been successfully leveraged in several software engineering (SE) applications. The existing SE techniques have used the embedding models ranging from static 
								to contextualized ones to build the vectors for program units. The contextualized vectors address a phenomenon in natural language texts called polysemy, which is the coexistence of different meanings of 
								a word/phrase. However, due to different nature, program units exhibit the nature of mixed polysemy. Some code tokens and statements exhibit polysemy while other tokens (e.g., keywords, separators, and 
								operators) and statements maintain the same meaning in different contexts. A natural question is whether static or contextualized embeddings fit better with the nature of mixed polysemy in source code. 
								The answer to this question is helpful for the SE researchers in selecting the right embedding model. We conducted experiments on 12 popular sequence-/tree-/graph-based embedding models and on the samples 
								of a dataset of 10,222 Java projects with +14M methods. We present several contextuality evaluation metrics adapted from natural-language texts to code structures to evaluate the embeddings from those models. 
								Among several findings, we found that the models with higher contextuality help a bug detection model perform better than the static ones. Neither static nor contextualized embedding models fit well with the 
								mixed polysemy nature of source code. Thus, we develop Hycode, a hybrid embedding model that fits better with the nature of mixed polysemy in source code. </p>
							
							<p><b><span class="black-underligned">Presentation Date:</span></b> <span class="black"><strong>Monday, January 15, 2024 at 3:00 PM CET</strong></span></p>

						</div>	
					</div>		
						
				
					<div class="event">
						<div class="presenter-details">
							<img src="img/Xin-Cheng_Wen.jpg"> 
							<h5> Xin-Cheng Wen </h5>
							<p> HIT </p>
						</div>	
						<div class="event-info">
							<h3>When Less is Enough: Positive and Unlabeled Learning Model for Vulnerability Detection</h3>
								<p> Automated code vulnerability detection has gained increasing attention in recent years. The deep learning (DL)-based methods, which implicitly learn vulnerable code patterns, have proven 
								effective in vulnerability detection. The performance of DL-based methods usually relies on the quantity and quality of labeled data. However, the current labeled data are generally automatically 
								collected, such as crawled from human-generated commits, making it hard to ensure the quality of the labels. Prior studies have demonstrated that the non-vulnerable code (i.e., negative labels) 
								tends to be unreliable in commonly-used datasets, while vulnerable code (i.e., positive labels) is more determined. Considering the large numbers of unlabeled data in practice, it is necessary and 
								worth exploring to leverage the positive data and large numbers of unlabeled data for more accurate vulnerability detection. In this paper, we focus on the Positive and Unlabeled (PU) learning problem
								for vulnerability detection and propose a novel model named PILOT, i.e., Positive and unlabeled Learning mOdel for vulnerability deTection. PILOT only learns from positive and unlabeled data for 
								vulnerability detection. It mainly contains two modules: (1) A distance-aware label selection module, aiming at generating pseudo-labels for selected unlabeled data, which involves the inter-class 
								distance prototype and progressive fine-tuning; (2) A mixed-supervision representation learning module to further alleviate the influence of noise and enhance the discrimination of representations. 
								The experimental results show that PILOT outperforms 
								the popular weakly supervised methods by 2.78%-18.93% in the PU learning setting. Compared with the state-of-the-art methods, PILOT also improves the performance of 1.34%-12.46 % in F1 score metrics in 
								the supervised setting. In addition, PILOT can identify 23 mislabeled from the FFMPeg+Qemu dataset in the PU learning setting based on manual checking. </p>
							
							<p><b><span class="black-underligned">Presentation Date:</span></b> <span class="black"><strong>Monday, January 29, 2024 at 10:30 AM CET</strong></span></p>

						</div>	
					</div>
					
							
			    </div>
            </div>
        </div>
		
		
        <!-- Upcoming Events  End -->

        <!-- Past Events Start -->
        <div class="pastevent wow zoomIn" data-wow-delay="0.1s">
            <div class="container">
			    <div class="col-lg-12 col-md-10">
			         <div class="frame-title">
                        <h2>Past Seminars<h2>
                    </div>

                    <div class="event-frame">
						<ul class="speech-list">
						
						
						    <li>
							
							    <div class="speech-header">
									<b>Robust Learning from Noisy and Imbalanced Software Engineering Datasets</b>, Monday, December 4, 2023, by <b>Zhong Li</b> from <b>NJU</b>
								</div>
								<div class="details">
									<div class="event">
										<div class="presenter-details">
											<img src="img/ZhongLi_nanjing_university.jpg">
												<h5> Zhong Li</h5>
												<p> Nanjing University </p>
										</div>
										<div class="event-info">
											<h3>Robust Learning of Deep Predictive Models from Noisy and Imbalanced Software Engineering Datasets</h3>
												<p> With the rapid development of Deep Learning, deep predictive models have been widely applied to improve Software Engineering tasks, such as defect prediction and issue 
												classification, and have achieved remarkable success. They are mostly trained in a supervised manner, which heavily relies on high-quality datasets. Unfortunately, due to 
												the nature and source of software engineering data, the real-world datasets often suffer from the issues of sample mislabelling and class imbalance, thus undermining the 
												effectiveness of deep predictive models in practice. This problem has become a major obstacle for deep learning-based Software Engineering. In this paper, we propose 
												RobustTrainer, the first approach to learning deep predictive models on raw training datasets where the mislabelled samples and the imbalanced classes coexist. 
												RobustTrainer consists of a two-stage training scheme, where the first learns feature representations robust to sample mislabelling and the second builds a classifier robust 
												to class imbalance based on the learned representations in the first stage. We apply RobustTrainer to two popular Software Engineering tasks, i.e., Bug Report Classification 
												and Software Defect Prediction. Evaluation results show that RobustTrainer effectively tackles the mislabelling and class imbalance issues and produces significantly better 
												deep predictive models compared to the other six comparison approaches. </p>
											
												<p><b><span class="black-underligned">Presentation Date:</span></b> <span class="black">Monday, December 18, 2023 at 10:30 AM CET</span></p>
										</div>
									</div>
								</div>
							</li>	
								
							
							<li>
								<div class="speech-header">
									<b>Dataflow Analysis-Inspired DL for Efficient Vulnerability Detection</b>, Monday, December 4, 2023, by <b>Benjamin Steenhoek</b> from <b>ISU</b>
								</div>
								<div class="details">
									<div class="event">
										<div class="presenter-details">
											<img src="img/benjamin.jpg">
												<h5> Benjamin Steenhoek</h5>
												<p> Iowa State University </p>
										</div>
										<div class="event-info">
											<h3>Dataflow Analysis-Inspired Deep Learning for Efficient Vulnerability Detection</h3>
												<p> Deep learning-based vulnerability detection has shown great performance and, in some studies, outperformed static analysis tools. However, the highest-performing 
												approaches use token-based transformer models, which are not the most efficient to capture code semantics required for vulnerability detection. In this paper, we propose 
												to combine such causal-based vulnerability detection algorithms with deep learning, aiming to achieve more efficient and effective vulnerability detection. Specifically, 
												we designed DeepDFA, a dataflow analysis-inspired graph learning framework and an embedding technique that enables graph learning to simulate dataflow computation. We show 
												that DeepDFA is both performant and efficient. DeepDFA outperformed all non-transformer baselines. It was trained in 9 minutes, 75x faster than the highest-performing baseline 
												model. When using only 50+ vulnerable and several hundreds of total examples as training data, the model retained the same performance as 100% of the dataset. DeepDFA also 
												generalized to real-world vulnerabilities in DbgBench; it detected 8.7 out of 17 vulnerabilities on average across folds and was able to distinguish between patched and buggy versions. 
												By combining DeepDFA with a large language model, we surpassed the state-of-the-art vulnerability detection performance on the Big-Vul dataset 
												with 96.46 F1 score, 97.82 precision, and 95.14 recall. </p>
											
												<p><b><span class="black-underligned">Presentation Date:</span></b> <span class="black">Monday, December 4, 2023 at 3:00 PM CET</span></p>
										</div>
									</div>
								</div>
							</li>
						
						
						    <li>
								<div class="speech-header">
									<b>Towards Understanding Fairness and its Composition in Ensemble ML</b>, Monday, November 20, 2023, by <b>Usman Gohar</b> from <b>ISU</b>
									
								</div>
								<div class="details">
									<div class="event">
										<div class="presenter-details">
											<img src="img/gohar.jpg">
												<h5> Usman Gohar</h5>
												<p> Iowa State University </p>
										</div>
										<div class="event-info">
											<h3>Towards Understanding Fairness and its Composition in Ensemble Machine Learning</h3>
												<p> Machine Learning (ML) software has been widely adopted in modern society, with reported fairness implications for minority groups based on race, sex, age, etc. 
												Many recent works have proposed methods to measure and mitigate algorithmic bias in ML models. The existing approaches focus on single classifier-based ML models. 
												However, real-world ML models are often composed of multiple independent or dependent learners in an ensemble (e.g., Random Forest), where the fairness composes in a non-trivial way. 
												How does fairness compose in ensembles? What are the fairness impacts of the learners on the ultimate fairness of the ensemble? Can fair learners result in an unfair ensemble?
												Furthermore, studies have shown that hyperparameters influence the fairness of ML models. Ensemble hyperparameters are more complex since they affect how learners are combined in 
												different categories of ensembles. In this paper, we comprehensively study popular real-world ensembles: Bagging, Boosting, Stacking, and Voting. We have developed a benchmark 
												of 168 ensemble models collected from Kaggle on four popular fairness datasets. We use existing fairness metrics to understand the composition of fairness. Our results show that 
												ensembles can be designed to be fairer without using mitigation techniques. We also identify the interplay between fairness composition and data characteristics to guide fair 
												ensemble design.</p>
											
												<p><b><span class="black-underligned">Presentation Date:</span></b> <span class="black">Monday, November 20, 2023 at 4:00 PM CET</span></p>
										</div>
									</div>
								</div>
							</li>
							
						
						    <li>
								<div class="speech-header">
									<b>ITER: Iterative Neural Repair for Multi-Location Patches</b>, Monday, November 6, 2023, by <b>He Ye</b> from <b>CMU</b>
									
								</div>
								
								<div class="details">
									<div class="event">
										<div class="presenter-details">
											<img src="img/ye_he.png">
												<h5> He Ye</h5>
												<p> Carnegie Mellon University</p>
										</div>
										<div class="event-info">
											<h3>ITER: Iterative Neural Repair for Multi-Location Patches</h3>
												<p>Automated program repair (APR) has achieved promising results, especially using neural networks. Yet, the overwhelming majority of patches produced 
												by APR tools are confined to one single location. When looking at the patches produced with neural repair, most of them fail to compile, while a few 
												uncompilable ones go in the right direction. In both cases, the fundamental problem is to ignore the potential of partial patches. In this paper, we 
												propose an iterative program repair paradigm called ITER founded on the concept of improving partial patches until they become plausible and correct. 
												First, ITER iteratively improves partial single-location patches by fixing compilation errors and further refining the previously generated code. 
												Second, ITER iteratively improves partial patches to construct multi-location patches, with fault localization re-execution. ITER is implemented for 
												Java based on battle-proven deep neural networks and code representation. ITER is evaluated on 476 bugs from 10 open-source projects in Defects4J 2.0. 
												ITER succeeds in repairing 76 of them, including 15 multi-location bugs which is a new frontier in the field.</p>
												<p><b><span class="black-underligned">Presentation Date:</span></b> <span class="black"> Monday, November 6, 2023 at 3:00 PM CET </span></p>
										</div>
									</div>
								</div>
							</li>
							
							
							<li>
								<div class="speech-header">
									<b>Guided Malware Sample Analysis Based on Graph Neural Networks</b>, Monday, October 30, 2023, by <b>Yi-Hsien Chen</b> from <b>NTU</b>
									
								</div>
								
								<div class="details">
									<div class="event">
										<div class="presenter-details">
											<img src="img/yi-hsien.jpg">
												<h5> Yi-Hsien Chen </h5>
												<p> National Taiwan University </p>
										</div>
										<div class="event-info">
											<h3>Guided Malware Sample Analysis Based on Graph Neural Networks</h3>
												<p> Malicious binaries have caused data and monetary loss to people, and these binaries keep evolving rapidly nowadays. While manual analysis is slow 
												and ineffective, automated malware report generation is a long-term goal for malware analysts and researchers. This study moves one step toward the goal
												by identifying essential functions in malicious binaries to accelerate and even automate the analyzing process. We design and implement an expert system 
												based on our proposed graph neural network called MalwareExpert. The system pinpoints the essential functions of an analyzed sample and visualizes the 
												relationships between involved parts. The evaluation results show that our approach has a competitive detection performance (97.3% accuracy and 96.5% recall 
												rate) compared to existing malware detection models. Moreover, it gives an intuitive and easy-to-understand explanation of the model predictions by visualizing 
												and correlating essential functions. We compare the identified essential functions reported by our system against several expert-made malware analysis reports 
												from multiple sources. Our qualitative and quantitative analyses show that the pinpointed functions indicate accurate directions. In the best case, the top 2% 
												of functions reported from the system can cover all expert-annotated functions in three steps. We believe that the MalwareExpert system has shed light on 
												automated program behavior analysis.</p>
							    <p><b><span class="black-underligned">Presentation Date:</span></b> <span class="black"> Monday, October 30, 2023 at 10:30 AM CET </span></p>
										</div>
									</div>
								</div>
							</li>
							
							
							<li>
								<div class="speech-header">
									<b>Rete: Learning Namespace Representation for Program Repair</b>, Monday, October 9, 2023, by <b>Nikhil Parasaram</b> from <b>UCL</b>
									
								</div>
								
								<div class="details">
									<div class="event">
										<div class="presenter-details">
											<img src="img/nikhil.jpg">
												<h5> Nikhil Parasaram</h5>
												<p> University College London</p>
										</div>
										<div class="event-info">
											<h3>Rete: Learning Namespace Representation for Program Repair</h3>
												<p>A key challenge of automated program repair is finding correct patches in the vast search space of candidate patches. Real-world programs define 
												large namespaces of variables that considerably contributes to the search space explosion. Existing program repair approaches neglect information about 
												the program namespace, which makes them inefficient and increases the chance of test-overfitting. We propose Rete, a new program repair technique, that 
												learns project-independent information about program namespace and uses it to navigate the search space of patches. Rete uses a neural network to extract 
												project-independent information about variable CDU chains, def-use chains augmented with control flow. Then, it ranks patches by jointly ranking variables 
												and the patch templates into which the variables are inserted. We evaluated Rete on 142 bugs extracted from two datasets, ManyBugs and BugsInPy. Our 
												experiments demonstrate that Rete generates six new correct patches that fix bugs that previous tools did not repair, an improvement of 31% and 59% over 
												the existing state of the art.</p>
												<p><b><span class="black-underligned">Presentation Date:</span></b> <span class="black"> Monday, October 9, 2023 at 10:30 AM CEST </span></p>
										</div>
									</div>
								</div>
							</li>
							
							
						<!-- Add more speech items as needed -->
						</ul>
						
					</div>
				</div>
			</div>
		</div>
		<!-- Past Events  End -->
		
		
		<script src="js/script.js"></script>
		
		
        <!-- Footer Start -->
        <div class="footer wow fadeIn" data-wow-delay="0.3s">
            <div class="container-fluid">
                <div class="container">
                    <div class="footer-info">
                        <a href="index.html" class="footer-logo">T<span>ru</span>X</a>
                        
                    </div>
                </div>
               
            </div>
        </div>
        <!-- Footer End -->

        <a href="#" class="back-to-top"><i class="fa fa-chevron-up"></i></a>

        <!-- JavaScript Libraries -->
        <script src="https://code.jquery.com/jquery-3.4.1.min.js"></script>
        <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/js/bootstrap.bundle.min.js"></script>
        <script src="lib/easing/easing.min.js"></script>
        <script src="lib/wow/wow.min.js"></script>
        <script src="lib/owlcarousel/owl.carousel.min.js"></script>
        <script src="lib/isotope/isotope.pkgd.min.js"></script>
        <script src="lib/lightbox/js/lightbox.min.js"></script>
        
        <!-- Contact Javascript File -->
        <script src="mail/jqBootstrapValidation.min.js"></script>
        <script src="mail/contact.js"></script>

    </body>
</html>