Automatically Hash Tagging Text With ASP.NET Web Forms (VB.NET)

Demonstrates how to automatically tag an input string with terms contained in a database.

Enter some text in the box, then click the submit button. Results will be shown here.


Terms in the database

adsense amazon android aol api apple array banana bing canvas
cbs chrome cloud comcast darpa eff facebook fight firefox frank
frog google hacker hackers hacking html html5 http https ie9
ietf intel internet ios ipad ipv6 javascript kinect malware microsoft
mozilla mvc nokia pentagon php ps3 rackspace safari silverlight sony
stuxnet symbian tablets twitter vb verizon virus windows xml youtube

Form

		<h2>Automatically Hash Tagging Text With ASP.NET Web Forms (VB.NET)</h2>
		<p><asp:Label runat="server" ID="lblResult" Text='Enter some text in the box, then click the submit button. Results will be shown here.' /></p>
		<asp:TextBox runat="server" ID="tbInput" TextMode="MultiLine" Rows="10" Columns="50" Text="Amazon uses HTML5 and JavaScript; Google owns YouTube."  />
		<asp:RequiredFieldValidator runat="server" ID="rfvInput" ControlToValidate="tbInput" ErrorMessage='<br />Please provide some text.' CssClass="warning" Display="Dynamic" />
		<br />
		<asp:Button runat="server" ID="btnSubmit" Text="Submit" />

		<h4>Terms in the database</h4>
		<asp:DataList runat="server" ID="dlTerms" DataSourceID="sqlTerms" RepeatColumns="10" RepeatDirection="Horizontal" CellPadding="5" CellSpacing="0" ItemStyle-BorderColor="Black" ItemStyle-BorderWidth="1">
			<ItemTemplate>
				<%#Eval("term_text")%>
			</ItemTemplate>
		</asp:DataList>

		<asp:SqlDataSource runat="server" ID="sqlTerms" SelectCommand="your stored procedure" SelectCommandType="StoredProcedure" ConnectionString="<%$ ConnectionStrings:your connection string %>" />
	

Code Behind

		Sub btnSubmit_click(ByVal Sender As Object, ByVal E As EventArgs) Handles btnSubmit.Click
			'get terms from database
			Dim arrTerms As New ArrayList()
			arrTerms = GetAllTerms()

			'get unique words from input text
			Dim arrWords As New ArrayList()
			arrWords = ExtractTerms(tbInput.Text)

			If arrTerms.Count < 1 Then
				lblResult.Text = "There are no terms in the database, or there was an error retrieving the terms."
				lblResult.CssClass = "warning"
			ElseIf arrWords.Count < 1 Then
				lblResult.Text = "There are no words in the string to be tagged."
				lblResult.CssClass = "warning"
			Else
				'get matches between terms and input words
				Dim arrHashes As New ArrayList()
				arrHashes = CompareLists(arrTerms, arrWords)

				If arrHashes.Count < 1 Then
					lblResult.Text = "There were no matches between the input text and the terms in the database."
					lblResult.CssClass = ""
				Else
					'display found terms
					Dim sbMsg As New StringBuilder("The following terms were found: ")
					For Each strTerm As String In arrHashes
						sbMsg.Append(strTerm)
						sbMsg.Append(", ")
					Next
					sbMsg.Remove(sbMsg.Length - 2, 2)
					lblResult.Text = sbMsg.ToString()
					lblResult.CssClass = ""   
					
					'autotag input string
					tbInput.Text = AutoTagSubject(tbInput.Text, arrHashes)
				End If
			End If

		End Sub

		Function GetAllTerms() As ArrayList
			'retrieves all terms from the database
			'returns empty ArrayList on error,
			'populated ArrayList on success

			Dim arrOut As New ArrayList()

			Dim objConn As New SqlConnection(ConfigurationManager.ConnectionStrings("your connection string").ConnectionString)
			Dim objCmd As New SqlCommand("your stored procedure", objConn)
			objCmd.CommandType = CommandType.StoredProcedure

			Dim objReader As SqlDataReader
			objConn.Open()
			objReader = objCmd.ExecuteReader()
			While objReader.Read()
				arrOut.Add(objReader(0))
			End While
			objConn.Close()
			objCmd.Dispose()
			objConn.Dispose()

			Return arrOut
		End Function

		Function ExtractTerms(ByVal strInput As String) As ArrayList
			'extracts all words from textbox
			'returns them as ArrayList, empty ArrayList on error
			'words are any alphanumeric sequence proceeding a space or newline

			Dim arrOut As New ArrayList()

			Dim reWords As New Regex("\w+(\s|$)", RegexOptions.IgnoreCase Or RegexOptions.CultureInvariant)
			Dim reMatches As MatchCollection = reWords.Matches(tbInput.Text)

			For Each reMatch As Match In reMatches
				arrOut.Add(reMatch.Value.Trim)
			Next

			Return arrOut
		End Function

		Function CompareLists(ByVal arrTerms As ArrayList, ByVal arrWords As ArrayList) As ArrayList
			'compares term list against word list
			'returns ArrayList with all words found in terms
			'maintains case

			Dim arrOut As New ArrayList()

			For Each strWord As String In arrWords
				For Each strTerm As String In arrTerms
					If strTerm.ToLower = strWord.ToLower Then
						arrOut.Add(strWord)
						Exit For
					End If
				Next
			Next

			Return arrOut
		End Function

		Function AutoTagSubject(ByVal strInput As String, ByVal arrTerms As ArrayList) As String
			'applies arrTerms as hashtags to strInput
			'removes hashtags first to avoid double-tagging
			
			Dim strOut As String = strInput
			strOut = strOut.Replace("#", "")

			For Each strTerm As String In arrTerms
				strOut = strOut.Replace(strTerm, "#" & strTerm)
			Next

			Return strOut
		End Function