//
//	Documentizer by Owen Williams
//
//	Processes a simple HTML document and converts all headers into a Table of Contents,
//	and all (case sensitive) occurances of header text into links within the document.
//

// dead-stupid browser detection 
var isGecko = /Gecko\//.test(navigator.userAgent);
var isWebkit = /WebKit/.test(navigator.userAgent);
var isIE = (document.all != null);

// the documentizer doesn't work well in IE -- show them a notice if necessary
if (isIE) document.getElementById("PlatformNotice").style.display = "block";


function $(id) {
	return (typeof id == "string" ? document.getElementById(id) : id);
}


// id of the main "contents" element (where all of the text leaves)
var pageContentsId = "Contents";

// id of the element in which to display the table of contents outline
var TOCid = "TOC";



// Regular expressions for matching part of the text
var headerHTMLRE = /<h.[\s>].*?<\/h.>/gi;	
var headerTitleRE = />(.*)</;
var headerOpenTagRE = /<h(.).*?>/;
var headerCloseTagRE = /<\/h.*>/;
var headerAlternateRE = /alt=['"](.*?)['"]/;


//	buildTableOfContents -- top-level routine which processes HTML and produces a TOC
//	
//	Compile all <h#> anchors in the main document text into an interactive Table of Contents.
//	Also converts any (case sensitive) occurance of a header in the rest of the text to a link.
//
function buildTableOfContents() {
	var t0 = new Date();

	// get the HTML of the page
	var contentsElement = $(pageContentsId);
	var html = contentsElement.innerHTML;
	
	// pull out the TOC elements
	var toc = extractTOC(html);

	// build the actual table of contents
	var tocElement = $(TOCid);
	tocElement.innerHTML = outputTOC(toc);	

	// process any instances of header text into a link to that header
	contentsElement.innerHTML = processTOCInHTML(html, toc);
	
	// for convenience, convert all <pre class='file'> elements in a special pre-formatted manner
	prettifyFilePREs(contentsElement);
	
//	if (window.console) console.log("Processing table of contents took " + (new Date() - t0) + " msec");
}

// extract all header tags in the document into a TOC data structure
function extractTOC(html) {
	var TOC = [];

	var headerMatches = html.match(headerHTMLRE);

	for (var i = 0; i < headerMatches.length; i++) {
		var headerHTML = headerMatches[i];
		var title = headerHTML.match(headerTitleRE)[1];
	
		// TODO: suppress things with empty title rather than just ignoring for TOC
		// TODO: suppress if there is no content (watch out for nested things!)
		if (!title) continue; 
		var openTag = headerHTML.match(headerOpenTagRE);
		var headerLevel = openTag[1];
		openTage = openTag[0];
		var openTag = headerHTML.match(headerOpenTagRE)[0];
		var closeTag = headerHTML.match(headerCloseTagRE)[0];

		// remove spaces in the actual ref name 
		var ref = title.toLowerCase().split(" ").join("");

		var matches = [title];

		// get any alternate names for this heading
		var alternates = openTag.match(headerAlternateRE);
		if (alternates) {
			matches = matches.concat(alternates[1].split(","));
			matches.sort(sortLongestFirst);
		}

		TOC.push({	
					level:headerLevel, 
					matches:matches,
					title:title, 
					ref:ref, 
					openTag:openTag, 
					closeTag:closeTag, 
					html:headerHTML, 
					alternates:alternates
				});
	}
	return TOC;
}

// output the TOC structure we've built into an outline
function outputTOC(toc) {
	var currentLevel = -1;
	var levels = [0,0,0,0,0,0];
	var output = [];
	
	output.push("<h1 class='top TOC'>Table of Contents</h1>");
	output.push("<div class='tocbody'>");
	function outputLevel() {
		var it = [];
		for (var i = 1; i <= header.level; i++) {
			it.push(levels[i]);
		}
		return it.join(".");
	}
	
	function resetLevels(level) {
		for (;level <= 6; level++) {
			levels[level] = 0;
		}
	}
	
	for (var i = 0; i < toc.length; i++) {
		var header = toc[i];
		
		if (header.level > currentLevel) {
			output.push("\n<ol class='tocSection tocSection", header.level, "'>");
			resetLevels(header.level);
		} else if (header.level < currentLevel) {
			for (; header.level < currentLevel; currentLevel--) {
				output.push("\n<\/ol>");
			}
		}
		levels[header.level]++;
		header.levelStr = outputLevel();
		currentLevel = header.level;

		output.push("\n<li class='tocItem tocItem", header.level, "'>", getHeaderAnchor(header.title, header)," <\/li>");
	}
	for (; currentLevel > 0; currentLevel--) {
		output.push("\n<\/ol>");
	}
	output.push("</div>");
	return output.join("");
}


// process occurances of TOC text (from headers) in the document to links
//
// four pass algorithm:
//	1: replace all toc header elements with $_#_$
//	2: replace all (sorted) occurances of header text with *_#_*
//	3: replace all $_#_$ with the header as a name= link
//	4: replace all *_#_* with the link text
function processTOCInHTML(html, toc) {
	// step 1:  replace all toc header elements with a marker
	for (var i = 0, item; item = toc[i++];) {
		var re = new RegExp("<h(.*?)>"+item.title+"</h.>");
		html = html.replace(re, "$_"+i+"_$");
	}

	// get a sorted copy of the toc
	var sorted_toc = [].concat(toc).sort(sortLongestFirst);
	
	// 2: replace occurances of the header text with a different marker
	for (var i = 0, item; item = sorted_toc[i++];) {
		for (var m = 0, match; match = item.matches[m++];) {
			html = html.split(match).join("*_"+i+"."+m+"_*");

// todo: get a regex to work instead of the above, so we don't match inside of a word
//			var re = new RegExp("\\b"+match+"\\b");
//			html = html.replace(re, "*_"+i+"."+m+"_*");
		}
	}
	
	
	// 3: replace header markers with header+name anchor
	for (var i = 0, item; item = toc[i++];) {
		html = html.split("$_"+i+"_$").join(
				"<\/div>" 
					+"<a name='"+item.ref+"'>"
						+"<h"+item.level+">"
							+item.levelStr + " " + item.title
						+"</h"+item.level+">"
					+"</a>"
				+ "<div class='section"+ item.level+ "'>"
			);
	}

	// 4: replace link text with <a href=''> anchor
	for (var i = 0, item; item = sorted_toc[i++];) {
		for (var m = 0, match; match = item.matches[m++];) {
			var replacement = getHeaderAnchor(match, item);
			html = html.split("*_"+i+"."+m+"_*").join(replacement);
		}
	}

	return html;
}




// convert certain types of <pre> tags to make them pretty
function prettifyFilePREs(element) {
	var pres = element.getElementsByTagName("pre");
	for (var i = 0; i < pres.length; i++) {
		var pre = pres[i];

		var html = pre.innerHTML;

		// figure out space before first line and remove that from all other lines
		//	this lets us indent in the source but not have egregious indentation in the output
		var firstLineTabs = html.match(/^[\n*\t ]*/);
		if (firstLineTabs && firstLineTabs[0]) {
			html = html.split("\n"+firstLineTabs[0]).join("\n").substring(firstLineTabs[0].length);
		}
		
		// convert all tabs to 4 spaces (as God intended tabs should be shown)
		html = html.split("\t").join("    ");
		
		// get rid of whitespace at the end
		html = html.replace(/\s*$/,"");

		if (pre.className.indexOf("xmlFile") > -1) {
			html = html.replace(/(?:\[|<|\&lt;)(.*?)(?:>|\&gt;)/gm, "<span class='tag'>$1<\/span>");
		} else if (pre.className.indexOf("propertiesFile") > -1) {
			html = html.replace(/^(\s)*#(IFDEF|ENDIF|ELSE|IFNDEF)(.*)/gm, "<span class='directive'>$1 #$2 $3<\/span>");
			html = html.replace(/^#(?!IFDEF|ENDIF|ELSE|IFNDEF)(.*)/gm, "<span class='comment'>#$1<\/span>");
			html = html.replace(/@(.*?)@/gm, "<span class='skinVar'>@$1@<\/span>");
		}

		pre.innerHTML = html;
	}
}



function getHeaderAnchor(text, header) {
		return ["<a href='#", header.ref, "'>", text, "<\/a>"].join("");
}


function sortLongestFirst(a,b) {
	if (a.title < b.title) return 1;
	if (b.title < a.title) return -1;
	return 0;
}


// build the table of contents as soon as the page loads (more or less)	
setTimeout("buildTableOfContents()",0);
