首页 ┆ 网站地图 ┆ 在线留言 ┆ 游戏资讯 ┆ 资源下载 ┆ 端午节祝福 ┆ 迅雷在线影视
设为首页
加入收藏
联系我们
高级搜索
您当前的位置: 主页>ASP专区>ASP教学>Asp开发的可以用来做采集与分析html代码的类
Asp开发的可以用来做采集与分析html代码的类
来源: 发布时间:2008-06-25 发布人: 浏览: 人次   字体: [ ]  

     '================================================
     '函数名:ReplaceTrim
     '作 用:过滤掉字符中所有的tab和回车和换行
     '================================================
     Public Function ReplaceTrim(ByVal strContent)
           On Error Resume Next
           Dim re
           Set re = New RegExp
           re.IgnoreCase = True
           re.Global = True
           re.Pattern = "(" & Chr(8) & "|" & Chr(9) & "|" & Chr(10) & "|" & Chr(13) & ")"
           strContent = re.Replace(strContent, vbNullString)
           Set re = Nothing
           ReplaceTrim = strContent
           Exit Function
     End Function
     '================================================
     '函数名:ReplaceTrim
     '作 用:过滤掉字符中所有的tab和回车和换行
     '================================================
     Public Function ReplacedTrim(ByVal strContent)
           On Error Resume Next
           Dim re
           Set re = New RegExp
           re.IgnoreCase = True
           re.Global = True
           re.Pattern = "(" & Chr(8) & "|" & Chr(9) & "|" & Chr(10) & "|" & Chr(13) & ")"
           strContent = re.Replace(strContent, vbNullString)
           re.Pattern = "(<!--(.+?)-->)"
           strContent = re.Replace(strContent, vbNullString)
           Set re = Nothing
           ReplacedTrim = strContent
           Exit Function
     End Function
     Public Function CheckMatch(ByVal strContent, ByVal start, ByVal last)
           If Len(strContent) = 0 Then Exit Function
           If Len(start) = 0 Then
                 CheckMatch = strContent
                 Exit Function
           End If
           If Len(last) = 0 Then
                 CheckMatch = strContent
                 Exit Function
           End If
          
           Dim strPattern
                
           On Error Resume Next
          
           strPattern = "(" & CorrectPattern(start) & ")(.+?)(" & CorrectPattern(last) & ")"
          
           Dim re
           Set re = New RegExp
           re.IgnoreCase = True
           re.Global = True
           re.Pattern = "(" & vbNewLine & ")"
           strContent = re.Replace(strContent, vbNullString)
           re.Pattern = strPattern
           strContent = re.Replace(strContent, vbNullString)
           Set re = Nothing
           CheckMatch = strContent
           Exit Function
     End Function
     Private Function CorrectPattern(ByVal str)
           str = Replace(str, "\", "\\")
           str = Replace(str, "~", "\~")
           str = Replace(str, "!", "\!")
           str = Replace(str, "@", "\@")
           str = Replace(str, "#", "\#")
           str = Replace(str, "%", "\%")
           str = Replace(str, "^", "\^")
           str = Replace(str, "&", "\&")
           str = Replace(str, "*", "\*")
           str = Replace(str, "(", "\(")
           str = Replace(str, ")", "\)")
           str = Replace(str, "-", "\-")
           str = Replace(str, "+", "\+")
           str = Replace(str, "[", "\[")
           str = Replace(str, "]", "\]")
           str = Replace(str, "<", "\<")
           str = Replace(str, ">", "\>")
           str = Replace(str, ".", "\.")
           str = Replace(str, "/", "\/")
           str = Replace(str, "?", "\?")
           str = Replace(str, "=", "\=")
           str = Replace(str, "|", "\|")
           str = Replace(str, "$", "\$")
           CorrectPattern = str
     End Function
     '================================================
     '函数名:ClearHtml
     '作 用:过滤掉字符中所有的HTML代码
     '参 数:Str   ----原字符串
     '返回值:过滤取后的字符串
     '================================================
     Public Function CheckHTML(ByVal str)
           On Error Resume Next
          
           Dim re
           Set re = New RegExp
           re.IgnoreCase = True
           re.Global = True
           re.Pattern = "<(.[^>]*)>"
           str = re.Replace(str, "")
           Set re = Nothing
           CheckHTML = str
           Exit Function

     End Function
     '================================================
     '函数名:ClearHtml
     '作 用:过滤掉字符中所有的HTML代码
     '参 数:Str   ----原字符串
     '返回值:过滤取后的字符串
     '================================================
     Public Function CheckHTMLToChar(ByVal str,ByVal strr)
           On Error Resume Next
          
           Dim re
           Set re = New RegExp
           re.IgnoreCase = True
           re.Global = True
           re.Pattern = "<(.[^>]*)>"
           str = re.Replace(str,strr)
           Set re = Nothing
           CheckHTMLToChar = str
           Exit Function

     End Function
     '================================================
     '函数名:CheckStopKey
     '作 用:判断是否有关键字而网站屏蔽该站
     '================================================
     Public Function CheckStopKey(ByVal HtmlCode)
           KeyWord=split("暂时不能访问|当前访问的页面因为含有不允许的关键字","|")
           For each key in KeyWord
                 If instr(HtmlCode,key)>0 then
                       CheckStopKey=true
                       Exit Function
                 Else
                       CheckStopKey=false
                 End if
           Next                 
     End Function
     '================================================
     '函数名:Formatime
     '作 用:格式化时间
     '================================================
     Public Function Formatime(ByVal datime)
           datime = Trim(Replace(Replace(Replace(Trim(datime), " ", ""), Chr(255), ""), Chr(127), ""))
           datime = Trim(Replace(Replace(Replace(Replace(datime, vbNewLine, ""), Chr(9), ""), Chr(39), ""), Chr(34), ""))
           If Not IsDate(datime) Then
                 Formatime = Now
                 Exit Function
           End If
           If Len(datime) < 11 Then
                 Formatime = CDate(datime & " " & FormatDateTime(Now, 3))
           Else
                 Formatime = CDate(datime)
           End If
     End Function
     '================================================
     '函数名:GetRemoteUrl
     '作 用:格式化成完整的URL
     '================================================
     Public Function FormatRemoteUrl(ByVal CurrentUrl, ByVal URL)
           Dim strUrl
           '--修改当是默认主页,并且只有一个目录的时候
           if InStr(URL, "/")=0 and InStr(URL, ".")=0 and InStr(URL, "(")=0 then
                 FormatRemoteUrl=Left(CurrentUrl, InStrRev(CurrentUrl, "/"))&URL&"/"
                 Exit Function
           end if           
           If Len(URL) < 2 Or Len(URL) > 255 Or Len(CurrentUrl) < 2 Then
                 FormatRemoteUrl = vbNullString
                 Exit Function
           End If

           CurrentUrl = Trim(Replace(Replace(Replace(Replace(Replace(CurrentUrl, "'", vbNullString), """", vbNullString), vbNewLine, vbNullString), "\", "/"), "|", vbNullString))
           URL = Trim(Replace(Replace(Replace(Replace(Replace(URL, "'", vbNullString), """", vbNullString), vbNewLine, vbNullString), "\", "/"), "|", vbNullString))
          
           If InStr(9, CurrentUrl, "/") = 0 Then
                 strUrl = CurrentUrl
           Else
                 strUrl = Left(CurrentUrl, InStr(9, CurrentUrl, "/") - 1)
           End If

           If strUrl = vbNullString Then strUrl = CurrentUrl
           Select Case Left(LCase(URL), 6)
                 Case "http:/", "https:", "ftp://", "rtsp:/", "mms://"
                       FormatRemoteUrl = URL
                       Exit Function
           End Select

           If Left(URL, 1) = "/" Then
                 FormatRemoteUrl = strUrl & URL
                 Exit Function
           End If
          
           If Left(URL, 3) = "../" Then
                 Dim ArrayUrl
                 Dim ArrayCurrentUrl
                 Dim ArrayTemp()
                 Dim strTemp
                 Dim i, n
                 Dim c, l
                 n = 0
                 ArrayCurrentUrl = Split(CurrentUrl, "/")
                 ArrayUrl = Split(URL, "../")
                 c = UBound(ArrayCurrentUrl)
                 l = UBound(ArrayUrl) + 1
                
                 If c > l + 2 Then
                       For i = 0 To c - l
                             ReDim Preserve ArrayTemp(n)
                             ArrayTemp(n) = ArrayCurrentUrl(i)
                             n = n + 1
                       Next
                       strTemp = Join(ArrayTemp, "/")
                 Else
                       strTemp = strUrl
                 End If
                 URL = Replace(URL, "../", vbNullString)
                 FormatRemoteUrl = strTemp & "/" & URL
                 Exit Function
           End If
          
           strUrl = Left(CurrentUrl, InStrRev(CurrentUrl, "/"))
           FormatRemoteUrl = strUrl & Replace(URL, "./", vbNullString)
           Exit Function
     End Function


共7页: 上一页 [1] [2] 3 [4] [5] [6] [7] 下一页
相 关 文 章   发布商链接
·ASP语言采用动网数据库制作显示IP图...
·如何在IIS中执行Python脚本的配置实...
·如何利用ASP把图片上传到数据库讲解...
·asp实现把文件存进access数据库并提...
·vbscript保留日期格式中的月份和日期...
·ASP脚本基础
·Asp实现人民币大小写转换代码
·ASP页面将数据库中检索数据生成到本...
·asp实现文件转移、压缩并直接存进数...
·ASP 实现压缩与解压代码
 §最新评论:(评论内容只代表网友观点,与本站立场无关!)
网名: 验证码:  【所有评论】【↑返回顶部
评 分: 12 345
评论内容:(不能超过500字,请自觉遵守互联网相关政策法规。[按 Ctrl+Enter 可直接提交]
注意:请勿在本站发布政治话题、色情及违反法律的内容。
IT知道网 声明:刊登此文章是为了传递更多信息,文章内容仅供参考,转载请注明出处。
推 荐 文 章
·vbscript保留日期格式中的月...
·asp实现把文件存进access数据
·如何利用ASP把图片上传到数据
·如何在IIS中执行Python脚本的
·ASP语言采用动网数据库制作显
·Asp技术实现数据导入进度状态
·asp防采集常用的六种方法
·asp动态生成RSS完整版代码
·ASP版+Ajax实现验证码通用模...
·asp中文数字验证码实现的代码
·VBS脚本调用系统的关机对话框
·ASP上传图片到数据库的代码
·ASP 实现压缩与解压代码
·asp实现文件转移、压缩并直接
·ASP页面将数据库中检索数据生
热 门 文 章
·VBS脚本调用系统的关机对话框...
·ASP版+Ajax实现验证码通用模...
·如何利用ASP把图片上传到数据...
·asp中文数字验证码实现的代码
·asp动态生成RSS完整版代码
·ASP上传图片到数据库的代码
·ASP页面将数据库中检索数据生...
·Asp实现人民币大小写转换代码
·asp实现文件转移、压缩并直接...
·ASP语言采用动网数据库制作显...
·asp实现把文件存进access数据...
·ASP脚本基础
·ASP 实现压缩与解压代码
·如何在IIS中执行Python脚本的...
·asp防采集常用的六种方法
网站首页 - 关于本站 - 加入收藏 - 网站地图 - 友情连接 - 在线留言 - 联系我们 - 返回顶部
Copyright © 2007 IT知道网.[冀ICP备07026896号]. All Rights Reserved .