Skip to content

Commit

Permalink
Removed remaining regular expressions
Browse files Browse the repository at this point in the history
  • Loading branch information
Knagis committed Sep 13, 2014
1 parent da87995 commit 1f298be
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 23 deletions.
92 changes: 69 additions & 23 deletions CommonMark/Parser/Scanner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,31 +10,12 @@ namespace CommonMark.Parser
/// </summary>
internal static partial class Scanner
{
private const RegexOptions useCompilation = RegexOptions.None;

/// <summary>
/// List of valid schemes of an URL. The array must be sorted.
/// </summary>
private static readonly string[] schemeArray = new[] { "aaa", "aaas", "about", "acap", "adiumxtra", "afp", "afs", "aim", "apt", "attachment", "aw", "beshare", "bitcoin", "bolo", "callto", "cap", "chrome", "chrome-extension", "cid", "coap", "com-eventbrite-attendee", "content", "crid", "cvs", "data", "dav", "dict", "dlna-playcontainer", "dlna-playsingle", "dns", "doi", "dtn", "dvb", "ed2k", "facetime", "feed", "file", "finger", "fish", "ftp", "geo", "gg", "git", "gizmoproject", "go", "gopher", "gtalk", "h323", "hcp", "http", "https", "iax", "icap", "icon", "im", "imap", "info", "ipn", "ipp", "irc", "irc6", "ircs", "iris", "iris.beep", "iris.lwz", "iris.xpc", "iris.xpcs", "itms", "jar", "javascript", "jms", "keyparc", "lastfm", "ldap", "ldaps", "magnet", "mailto", "maps", "market", "message", "mid", "mms", "ms-help", "msnim", "msrp", "msrps", "mtqp", "mumble", "mupdate", "mvn", "news", "nfs", "ni", "nih", "nntp", "notes", "oid", "opaquelocktoken", "palm", "paparazzi", "platform", "pop", "pres", "proxy", "psyc", "query", "res", "resource", "rmi", "rsync", "rtmp", "rtsp", "secondlife", "service", "session", "sftp", "sgn", "shttp", "sieve", "sip", "sips", "skype", "smb", "sms", "snmp", "soap.beep", "soap.beeps", "soldat", "spotify", "ssh", "steam", "svn", "tag", "teamspeak", "tel", "telnet", "tftp", "things", "thismessage", "tip", "tn3270", "tv", "udp", "unreal", "urn", "ut2004", "vemmi", "ventrilo", "view-source", "webcal", "ws", "wss", "wtai", "wyciwyg", "xcon", "xcon-userid", "xfire", "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "xri", "ymsgr", "z39.50r", "z39.50s" };
private static readonly string[] blockTagNames = new[] { "article", "aside", "blockquote", "body", "button", "canvas", "caption", "col", "colgroup", "dd", "div", "dl", "dt", "embed", "fieldset", "figcaption", "figure", "footer", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "iframe", "li", "map", "object", "ol", "output", "p", "pre", "progress", "script", "section", "style", "table", "tbody", "td", "textarea", "tfoot", "th", "thead", "tr", "ul", "video" };


private static readonly Regex autolink_email = new Regex("^[a-zA-Z0-9.!#$%&'\\*+/=?^_`{|}~-]+[@][a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?([.][a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*[>]", useCompilation);
private static readonly Regex close_code_fence = new Regex(@"^([`]{3,}|[~]{3,})(?:\s*)$", useCompilation);

private static int MatchRegex(string s, int pos, params Regex[] regexes)
{
Match m;
foreach (var r in regexes)
{
m = r.Match(s, pos, s.Length - pos);
if (m.Success && m.Index == pos)
return m.Length;
}

return 0;
}

/// <summary>
/// Try to match URI autolink after first &lt;, returning number of chars matched.
/// </summary>
Expand Down Expand Up @@ -89,7 +70,53 @@ public static int scan_autolink_email(string s, int pos)
[>] { return (p - start); }
.? { return 0; }
*/
return MatchRegex(s, pos, autolink_email);

if (pos + 6 >= s.Length)
return 0;

char c = s[pos];
if (c == '@')
return 0;

int i = pos;
int ln = s.Length - 1;
while (i <= ln)
{
if (c == '@')
break;

if ((c < 'a' || c > 'z')
&& (c < 'A' || c > 'Z')
&& (c < '0' || c > '9')
&& ".!#$%&'*+/=?^_`{|}~-".IndexOf(c) == -1)
return 0;
if (i == ln) return 0;
c = s[++i];
}

// move past '@'
if (i == ln) return 0;
c = s[++i];
bool hadDot = false;

while (true)
{
var domainStart = i;
if (!ScannerCharacterMatcher.MatchAsciiLetterOrDigit(s, ref c, ref i, ln, '-'))
return 0;

if (s[i - 1] == '-' || i - domainStart > 63)
return 0;

if (c == '>')
return hadDot ? i - pos + 1 : 0;

if (c != '.' || i == ln)
return 0;

hadDot = true;
c = s[++i];
}
}

/// <summary>
Expand Down Expand Up @@ -458,9 +485,28 @@ public static int scan_close_code_fence(string s, int pos, int len)
} }
.? { return 0; }
*/
var p = MatchRegex(s, pos, close_code_fence);
if (p > len)
return p;
if (pos + len >= s.Length)
return 0;

var c1 = s[pos];
if (c1 != '`' && c1 != '~')
return 0;

char c;
var cnt = 1;
var spaces = false;
for (var i = pos + 1; i < s.Length; i++)
{
c = s[i];
if (c == c1 && !spaces)
cnt++;
else if (c == ' ')
spaces = true;
else if (c == '\n')
return cnt < len ? 0 : cnt;
else
return 0;
}

return 0;
}
Expand Down
21 changes: 21 additions & 0 deletions CommonMark/Parser/ScannerCharacterMatcher.cs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,27 @@ internal static bool MatchAsciiLetterOrDigit(string data, ref char currentCharac
return matched;
}

/// <summary>
/// Moves along the given string as long as the current character is a ASCII letter or digit or one of the given additional characters.
/// </summary>
#if OptimizeFor45
[System.Runtime.CompilerServices.MethodImpl(System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]
#endif
internal static bool MatchAsciiLetterOrDigit(string data, ref char currentCharacter, ref int currentPosition, int lastPosition, char valid1)
{
var matched = false;
while ((currentCharacter == valid1
|| (currentCharacter >= 'a' && currentCharacter <= 'z')
|| (currentCharacter >= 'A' && currentCharacter <= 'Z')
|| (currentCharacter >= '0' && currentCharacter <= '9'))
&& currentPosition < lastPosition)
{
currentCharacter = data[++currentPosition];
matched = true;
}
return matched;
}

/// <summary>
/// Moves along the given string as long as the current character is a ASCII letter or one of the given additional characters.
/// </summary>
Expand Down

0 comments on commit 1f298be

Please sign in to comment.