From 1fab36d3018049bfcd23170cc61d994ad5daac15 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 2 Jul 2016 12:11:44 -0700 Subject: [PATCH] Add libsyntax_pos from 1.11.0-nightly (01411937f 2016-07-01) --- syntex_pos/src/Cargo.toml | 12 + syntex_pos/src/lib.rs | 667 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 679 insertions(+) create mode 100644 syntex_pos/src/Cargo.toml create mode 100644 syntex_pos/src/lib.rs diff --git a/syntex_pos/src/Cargo.toml b/syntex_pos/src/Cargo.toml new file mode 100644 index 00000000..760aaa8a --- /dev/null +++ b/syntex_pos/src/Cargo.toml @@ -0,0 +1,12 @@ +[package] +authors = ["The Rust Project Developers"] +name = "syntax_pos" +version = "0.0.0" + +[lib] +name = "syntax_pos" +path = "lib.rs" +crate-type = ["dylib"] + +[dependencies] +serialize = { path = "../libserialize" } diff --git a/syntex_pos/src/lib.rs b/syntex_pos/src/lib.rs new file mode 100644 index 00000000..39bb5956 --- /dev/null +++ b/syntex_pos/src/lib.rs @@ -0,0 +1,667 @@ +// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! The source positions and related helper functions +//! +//! # Note +//! +//! This API is completely unstable and subject to change. + +#![crate_name = "syntax_pos"] +#![unstable(feature = "rustc_private", issue = "27812")] +#![crate_type = "dylib"] +#![crate_type = "rlib"] +#![doc(html_logo_url = "https://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png", + html_favicon_url = "https://doc.rust-lang.org/favicon.ico", + html_root_url = "https://doc.rust-lang.org/nightly/")] +#![cfg_attr(not(stage0), deny(warnings))] + +#![feature(custom_attribute)] +#![allow(unused_attributes)] +#![feature(rustc_private)] +#![feature(staged_api)] +#![feature(question_mark)] + +use std::cell::{Cell, RefCell}; +use std::ops::{Add, Sub}; +use std::rc::Rc; +use std::cmp; + +use std::fmt; + +use serialize::{Encodable, Decodable, Encoder, Decoder}; + +extern crate serialize; +extern crate serialize as rustc_serialize; // used by deriving + +pub type FileName = String; + +/// Spans represent a region of code, used for error reporting. Positions in spans +/// are *absolute* positions from the beginning of the codemap, not positions +/// relative to FileMaps. Methods on the CodeMap can be used to relate spans back +/// to the original source. +/// You must be careful if the span crosses more than one file - you will not be +/// able to use many of the functions on spans in codemap and you cannot assume +/// that the length of the span = hi - lo; there may be space in the BytePos +/// range between files. +#[derive(Clone, Copy, Hash, PartialEq, Eq)] +pub struct Span { + pub lo: BytePos, + pub hi: BytePos, + /// Information about where the macro came from, if this piece of + /// code was created by a macro expansion. + pub expn_id: ExpnId +} + +/// A collection of spans. Spans have two orthogonal attributes: +/// +/// - they can be *primary spans*. In this case they are the locus of +/// the error, and would be rendered with `^^^`. +/// - they can have a *label*. In this case, the label is written next +/// to the mark in the snippet when we render. +#[derive(Clone)] +pub struct MultiSpan { + primary_spans: Vec, + span_labels: Vec<(Span, String)>, +} + +impl Span { + /// Returns a new span representing just the end-point of this span + pub fn end_point(self) -> Span { + let lo = cmp::max(self.hi.0 - 1, self.lo.0); + Span { lo: BytePos(lo), hi: self.hi, expn_id: self.expn_id} + } + + /// Returns `self` if `self` is not the dummy span, and `other` otherwise. + pub fn substitute_dummy(self, other: Span) -> Span { + if self.source_equal(&DUMMY_SP) { other } else { self } + } + + pub fn contains(self, other: Span) -> bool { + self.lo <= other.lo && other.hi <= self.hi + } + + /// Return true if the spans are equal with regards to the source text. + /// + /// Use this instead of `==` when either span could be generated code, + /// and you only care that they point to the same bytes of source text. + pub fn source_equal(&self, other: &Span) -> bool { + self.lo == other.lo && self.hi == other.hi + } + + /// Returns `Some(span)`, a union of `self` and `other`, on overlap. + pub fn merge(self, other: Span) -> Option { + if self.expn_id != other.expn_id { + return None; + } + + if (self.lo <= other.lo && self.hi > other.lo) || + (self.lo >= other.lo && self.lo < other.hi) { + Some(Span { + lo: cmp::min(self.lo, other.lo), + hi: cmp::max(self.hi, other.hi), + expn_id: self.expn_id, + }) + } else { + None + } + } + + /// Returns `Some(span)`, where the start is trimmed by the end of `other` + pub fn trim_start(self, other: Span) -> Option { + if self.hi > other.hi { + Some(Span { lo: cmp::max(self.lo, other.hi), .. self }) + } else { + None + } + } +} + +#[derive(Clone, Debug)] +pub struct SpanLabel { + /// The span we are going to include in the final snippet. + pub span: Span, + + /// Is this a primary span? This is the "locus" of the message, + /// and is indicated with a `^^^^` underline, versus `----`. + pub is_primary: bool, + + /// What label should we attach to this span (if any)? + pub label: Option, +} + +impl Encodable for Span { + fn encode(&self, s: &mut S) -> Result<(), S::Error> { + s.emit_struct("Span", 2, |s| { + s.emit_struct_field("lo", 0, |s| { + self.lo.encode(s) + })?; + + s.emit_struct_field("hi", 1, |s| { + self.hi.encode(s) + }) + }) + } +} + +impl Decodable for Span { + fn decode(d: &mut D) -> Result { + d.read_struct("Span", 2, |d| { + let lo = d.read_struct_field("lo", 0, |d| { + BytePos::decode(d) + })?; + + let hi = d.read_struct_field("hi", 1, |d| { + BytePos::decode(d) + })?; + + Ok(mk_sp(lo, hi)) + }) + } +} + +fn default_span_debug(span: Span, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "Span {{ lo: {:?}, hi: {:?}, expn_id: {:?} }}", + span.lo, span.hi, span.expn_id) +} + +impl fmt::Debug for Span { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + SPAN_DEBUG.with(|span_debug| span_debug.get()(*self, f)) + } +} + +pub const DUMMY_SP: Span = Span { lo: BytePos(0), hi: BytePos(0), expn_id: NO_EXPANSION }; + +// Generic span to be used for code originating from the command line +pub const COMMAND_LINE_SP: Span = Span { lo: BytePos(0), + hi: BytePos(0), + expn_id: COMMAND_LINE_EXPN }; + +impl MultiSpan { + pub fn new() -> MultiSpan { + MultiSpan { + primary_spans: vec![], + span_labels: vec![] + } + } + + pub fn from_span(primary_span: Span) -> MultiSpan { + MultiSpan { + primary_spans: vec![primary_span], + span_labels: vec![] + } + } + + pub fn from_spans(vec: Vec) -> MultiSpan { + MultiSpan { + primary_spans: vec, + span_labels: vec![] + } + } + + pub fn push_span_label(&mut self, span: Span, label: String) { + self.span_labels.push((span, label)); + } + + /// Selects the first primary span (if any) + pub fn primary_span(&self) -> Option { + self.primary_spans.first().cloned() + } + + /// Returns all primary spans. + pub fn primary_spans(&self) -> &[Span] { + &self.primary_spans + } + + /// Returns the strings to highlight. We always ensure that there + /// is an entry for each of the primary spans -- for each primary + /// span P, if there is at least one label with span P, we return + /// those labels (marked as primary). But otherwise we return + /// `SpanLabel` instances with empty labels. + pub fn span_labels(&self) -> Vec { + let is_primary = |span| self.primary_spans.contains(&span); + let mut span_labels = vec![]; + + for &(span, ref label) in &self.span_labels { + span_labels.push(SpanLabel { + span: span, + is_primary: is_primary(span), + label: Some(label.clone()) + }); + } + + for &span in &self.primary_spans { + if !span_labels.iter().any(|sl| sl.span == span) { + span_labels.push(SpanLabel { + span: span, + is_primary: true, + label: None + }); + } + } + + span_labels + } +} + +impl From for MultiSpan { + fn from(span: Span) -> MultiSpan { + MultiSpan::from_span(span) + } +} + +#[derive(PartialEq, Eq, Clone, Debug, Hash, RustcEncodable, RustcDecodable, Copy)] +pub struct ExpnId(pub u32); + +pub const NO_EXPANSION: ExpnId = ExpnId(!0); +// For code appearing from the command line +pub const COMMAND_LINE_EXPN: ExpnId = ExpnId(!1); + +impl ExpnId { + pub fn from_u32(id: u32) -> ExpnId { + ExpnId(id) + } + + pub fn into_u32(self) -> u32 { + self.0 + } +} + +/// Identifies an offset of a multi-byte character in a FileMap +#[derive(Copy, Clone, RustcEncodable, RustcDecodable, Eq, PartialEq)] +pub struct MultiByteChar { + /// The absolute offset of the character in the CodeMap + pub pos: BytePos, + /// The number of bytes, >=2 + pub bytes: usize, +} + +/// A single source in the CodeMap. +pub struct FileMap { + /// The name of the file that the source came from, source that doesn't + /// originate from files has names between angle brackets by convention, + /// e.g. `` + pub name: FileName, + /// The absolute path of the file that the source came from. + pub abs_path: Option, + /// The complete source code + pub src: Option>, + /// The start position of this source in the CodeMap + pub start_pos: BytePos, + /// The end position of this source in the CodeMap + pub end_pos: BytePos, + /// Locations of lines beginnings in the source code + pub lines: RefCell>, + /// Locations of multi-byte characters in the source code + pub multibyte_chars: RefCell>, +} + +impl Encodable for FileMap { + fn encode(&self, s: &mut S) -> Result<(), S::Error> { + s.emit_struct("FileMap", 6, |s| { + s.emit_struct_field("name", 0, |s| self.name.encode(s))?; + s.emit_struct_field("abs_path", 1, |s| self.abs_path.encode(s))?; + s.emit_struct_field("start_pos", 2, |s| self.start_pos.encode(s))?; + s.emit_struct_field("end_pos", 3, |s| self.end_pos.encode(s))?; + s.emit_struct_field("lines", 4, |s| { + let lines = self.lines.borrow(); + // store the length + s.emit_u32(lines.len() as u32)?; + + if !lines.is_empty() { + // In order to preserve some space, we exploit the fact that + // the lines list is sorted and individual lines are + // probably not that long. Because of that we can store lines + // as a difference list, using as little space as possible + // for the differences. + let max_line_length = if lines.len() == 1 { + 0 + } else { + lines.windows(2) + .map(|w| w[1] - w[0]) + .map(|bp| bp.to_usize()) + .max() + .unwrap() + }; + + let bytes_per_diff: u8 = match max_line_length { + 0 ... 0xFF => 1, + 0x100 ... 0xFFFF => 2, + _ => 4 + }; + + // Encode the number of bytes used per diff. + bytes_per_diff.encode(s)?; + + // Encode the first element. + lines[0].encode(s)?; + + let diff_iter = (&lines[..]).windows(2) + .map(|w| (w[1] - w[0])); + + match bytes_per_diff { + 1 => for diff in diff_iter { (diff.0 as u8).encode(s)? }, + 2 => for diff in diff_iter { (diff.0 as u16).encode(s)? }, + 4 => for diff in diff_iter { diff.0.encode(s)? }, + _ => unreachable!() + } + } + + Ok(()) + })?; + s.emit_struct_field("multibyte_chars", 5, |s| { + (*self.multibyte_chars.borrow()).encode(s) + }) + }) + } +} + +impl Decodable for FileMap { + fn decode(d: &mut D) -> Result { + + d.read_struct("FileMap", 6, |d| { + let name: String = d.read_struct_field("name", 0, |d| Decodable::decode(d))?; + let abs_path: Option = + d.read_struct_field("abs_path", 1, |d| Decodable::decode(d))?; + let start_pos: BytePos = d.read_struct_field("start_pos", 2, |d| Decodable::decode(d))?; + let end_pos: BytePos = d.read_struct_field("end_pos", 3, |d| Decodable::decode(d))?; + let lines: Vec = d.read_struct_field("lines", 4, |d| { + let num_lines: u32 = Decodable::decode(d)?; + let mut lines = Vec::with_capacity(num_lines as usize); + + if num_lines > 0 { + // Read the number of bytes used per diff. + let bytes_per_diff: u8 = Decodable::decode(d)?; + + // Read the first element. + let mut line_start: BytePos = Decodable::decode(d)?; + lines.push(line_start); + + for _ in 1..num_lines { + let diff = match bytes_per_diff { + 1 => d.read_u8()? as u32, + 2 => d.read_u16()? as u32, + 4 => d.read_u32()?, + _ => unreachable!() + }; + + line_start = line_start + BytePos(diff); + + lines.push(line_start); + } + } + + Ok(lines) + })?; + let multibyte_chars: Vec = + d.read_struct_field("multibyte_chars", 5, |d| Decodable::decode(d))?; + Ok(FileMap { + name: name, + abs_path: abs_path, + start_pos: start_pos, + end_pos: end_pos, + src: None, + lines: RefCell::new(lines), + multibyte_chars: RefCell::new(multibyte_chars) + }) + }) + } +} + +impl fmt::Debug for FileMap { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "FileMap({})", self.name) + } +} + +impl FileMap { + /// EFFECT: register a start-of-line offset in the + /// table of line-beginnings. + /// UNCHECKED INVARIANT: these offsets must be added in the right + /// order and must be in the right places; there is shared knowledge + /// about what ends a line between this file and parse.rs + /// WARNING: pos param here is the offset relative to start of CodeMap, + /// and CodeMap will append a newline when adding a filemap without a newline at the end, + /// so the safe way to call this is with value calculated as + /// filemap.start_pos + newline_offset_relative_to_the_start_of_filemap. + pub fn next_line(&self, pos: BytePos) { + // the new charpos must be > the last one (or it's the first one). + let mut lines = self.lines.borrow_mut(); + let line_len = lines.len(); + assert!(line_len == 0 || ((*lines)[line_len - 1] < pos)); + lines.push(pos); + } + + /// get a line from the list of pre-computed line-beginnings. + /// line-number here is 0-based. + pub fn get_line(&self, line_number: usize) -> Option<&str> { + match self.src { + Some(ref src) => { + let lines = self.lines.borrow(); + lines.get(line_number).map(|&line| { + let begin: BytePos = line - self.start_pos; + let begin = begin.to_usize(); + // We can't use `lines.get(line_number+1)` because we might + // be parsing when we call this function and thus the current + // line is the last one we have line info for. + let slice = &src[begin..]; + match slice.find('\n') { + Some(e) => &slice[..e], + None => slice + } + }) + } + None => None + } + } + + pub fn record_multibyte_char(&self, pos: BytePos, bytes: usize) { + assert!(bytes >=2 && bytes <= 4); + let mbc = MultiByteChar { + pos: pos, + bytes: bytes, + }; + self.multibyte_chars.borrow_mut().push(mbc); + } + + pub fn is_real_file(&self) -> bool { + !(self.name.starts_with("<") && + self.name.ends_with(">")) + } + + pub fn is_imported(&self) -> bool { + self.src.is_none() + } + + pub fn count_lines(&self) -> usize { + self.lines.borrow().len() + } +} + +// _____________________________________________________________________________ +// Pos, BytePos, CharPos +// + +pub trait Pos { + fn from_usize(n: usize) -> Self; + fn to_usize(&self) -> usize; +} + +/// A byte offset. Keep this small (currently 32-bits), as AST contains +/// a lot of them. +#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] +pub struct BytePos(pub u32); + +/// A character offset. Because of multibyte utf8 characters, a byte offset +/// is not equivalent to a character offset. The CodeMap will convert BytePos +/// values to CharPos values as necessary. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] +pub struct CharPos(pub usize); + +// FIXME: Lots of boilerplate in these impls, but so far my attempts to fix +// have been unsuccessful + +impl Pos for BytePos { + fn from_usize(n: usize) -> BytePos { BytePos(n as u32) } + fn to_usize(&self) -> usize { let BytePos(n) = *self; n as usize } +} + +impl Add for BytePos { + type Output = BytePos; + + fn add(self, rhs: BytePos) -> BytePos { + BytePos((self.to_usize() + rhs.to_usize()) as u32) + } +} + +impl Sub for BytePos { + type Output = BytePos; + + fn sub(self, rhs: BytePos) -> BytePos { + BytePos((self.to_usize() - rhs.to_usize()) as u32) + } +} + +impl Encodable for BytePos { + fn encode(&self, s: &mut S) -> Result<(), S::Error> { + s.emit_u32(self.0) + } +} + +impl Decodable for BytePos { + fn decode(d: &mut D) -> Result { + Ok(BytePos(d.read_u32()?)) + } +} + +impl Pos for CharPos { + fn from_usize(n: usize) -> CharPos { CharPos(n) } + fn to_usize(&self) -> usize { let CharPos(n) = *self; n } +} + +impl Add for CharPos { + type Output = CharPos; + + fn add(self, rhs: CharPos) -> CharPos { + CharPos(self.to_usize() + rhs.to_usize()) + } +} + +impl Sub for CharPos { + type Output = CharPos; + + fn sub(self, rhs: CharPos) -> CharPos { + CharPos(self.to_usize() - rhs.to_usize()) + } +} + +// _____________________________________________________________________________ +// Loc, LocWithOpt, FileMapAndLine, FileMapAndBytePos +// + +/// A source code location used for error reporting +#[derive(Debug)] +pub struct Loc { + /// Information about the original source + pub file: Rc, + /// The (1-based) line number + pub line: usize, + /// The (0-based) column offset + pub col: CharPos +} + +/// A source code location used as the result of lookup_char_pos_adj +// Actually, *none* of the clients use the filename *or* file field; +// perhaps they should just be removed. +#[derive(Debug)] +pub struct LocWithOpt { + pub filename: FileName, + pub line: usize, + pub col: CharPos, + pub file: Option>, +} + +// used to be structural records. Better names, anyone? +#[derive(Debug)] +pub struct FileMapAndLine { pub fm: Rc, pub line: usize } +#[derive(Debug)] +pub struct FileMapAndBytePos { pub fm: Rc, pub pos: BytePos } + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct LineInfo { + /// Index of line, starting from 0. + pub line_index: usize, + + /// Column in line where span begins, starting from 0. + pub start_col: CharPos, + + /// Column in line where span ends, starting from 0, exclusive. + pub end_col: CharPos, +} + +pub struct FileLines { + pub file: Rc, + pub lines: Vec +} + +thread_local!(pub static SPAN_DEBUG: Cell fmt::Result> = + Cell::new(default_span_debug)); + +/* assuming that we're not in macro expansion */ +pub fn mk_sp(lo: BytePos, hi: BytePos) -> Span { + Span {lo: lo, hi: hi, expn_id: NO_EXPANSION} +} + +pub struct MacroBacktrace { + /// span where macro was applied to generate this code + pub call_site: Span, + + /// name of macro that was applied (e.g., "foo!" or "#[derive(Eq)]") + pub macro_decl_name: String, + + /// span where macro was defined (if known) + pub def_site_span: Option, +} + +// _____________________________________________________________________________ +// SpanLinesError, SpanSnippetError, DistinctSources, MalformedCodemapPositions +// + +pub type FileLinesResult = Result; + +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum SpanLinesError { + IllFormedSpan(Span), + DistinctSources(DistinctSources), +} + +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum SpanSnippetError { + IllFormedSpan(Span), + DistinctSources(DistinctSources), + MalformedForCodemap(MalformedCodemapPositions), + SourceNotAvailable { filename: String } +} + +#[derive(Clone, PartialEq, Eq, Debug)] +pub struct DistinctSources { + pub begin: (String, BytePos), + pub end: (String, BytePos) +} + +#[derive(Clone, PartialEq, Eq, Debug)] +pub struct MalformedCodemapPositions { + pub name: String, + pub source_len: usize, + pub begin_pos: BytePos, + pub end_pos: BytePos +} +