首页 ┆ 网站地图 ┆ 在线留言 ┆ 游戏资讯 ┆ 资源下载 ┆ 端午节祝福 ┆ 迅雷在线影视
设为首页
加入收藏
联系我们
高级搜索
您当前的位置: 主页>ASP专区>ASP教学>Asp开发的可以用来做采集与分析html代码的类
Asp开发的可以用来做采集与分析html代码的类
来源: 发布时间:2008-06-25 发布人: 浏览: 人次   字体: [ ]  

     '================================================
     '函数名:CheckRemoteUrl
     '作 用: 判断远程URL
     '================================================
     Public Function CheckHTTP(ByVal URL)
           Dim Retrieval
          
           On Error Resume Next
           Set Retrieval = CreateObject("MSXML2.XMLHTTP")
           With Retrieval
                 .Open "HEAD", URL, False
                 .send
                 If .readyState <> 4 Then
                       CheckHTTP = False
                       Set Retrieval = Nothing
                       Exit Function
                 End If
                 If .Status < 300 Then
                       CheckHTTP = True
                       Set Retrieval = Nothing
                       Exit Function
                 Else
                       CheckHTTP = False
                       Set Retrieval = Nothing
                       Exit Function
                 End If
           End With
           If Err.Number <> 0 Then
                 CheckHTTP = False
                 Err.Clear
                 Set Retrieval = Nothing
                 Exit Function
           End If
           Set Retrieval = Nothing
           Exit Function
     End Function
     '================================================
     '函数名:GetHTTPPage
     '作 用:获取HTTP页
     '参 数:url   ----远程URL
     '返回值:远程HTML代码
     '================================================
     Public Function GetRemoteData(ByVal URL, ByVal Cset)
           If Len(Cset) < 2 Then Cset = "GB2312"
          
           Dim strHeader
           Dim l
          
           On Error Resume Next
          
           Dim Retrieval
           Dim ObjStream
           Set ObjStream = CreateObject("ADODB.Stream")
           ObjStream.Type = 1
           ObjStream.Mode = 3
           ObjStream.Open
           Set Retrieval = CreateObject("MSXML2.XMLHTTP")
           With Retrieval
                 .Open "GET", URL, False
                 .setRequestHeader "Referer", URL
                 .send
                 If .readyState <> 4 Then Exit Function
                 If .Status > 300 Then Exit Function
                 '--获取目标网站文件头
                 strHeader = .getResponseHeader("Content-Type")
                 strHeader = UCase(strHeader)
                 ObjStream.Write (.responseBody)
           End With
           Set Retrieval = Nothing
          
           If Len(strHeader) > 0 Then
                 '--获取目标文件编码
                 l = InStrRev(strHeader, "CHARSET=", -1, 1)
                 If l > 0 Then
                       Cset = Right(strHeader, Len(strHeader) - l - 7)
                 Else
                       Cset = Cset
                 End If
           End If

           ObjStream.Position = 0
           ObjStream.Type = 2
           ObjStream.Charset = Trim(Cset)
           GetRemoteData = ObjStream.ReadText
           ObjStream.Close
           Set ObjStream = Nothing
           Exit Function
     End Function
     '================================================
     '函数名:FindMatch
     '作 用:截取相匹配的内容
     '返回值:截取后的字符串
     '================================================
     Public Function FindMatch(ByVal str, ByVal start, ByVal last)
          
           Dim Match
           Dim s
           Dim FilterStr
           Dim MatchStr
           Dim strContent
           Dim ArrayFilter()
           Dim i, n
           Dim bRepeat
          
           If Len(start) = 0 Or Len(last) = 0 Then Exit Function
          
           On Error Resume Next
          
           MatchStr = "(" & CorrectPattern(start) & ")(.+?)(" & CorrectPattern(last) & ")"
          
           Dim re
           Set re = New RegExp
           re.IgnoreCase = True
           re.Global = True
           re.Pattern = MatchStr
           Set s = re.Execute(str)
           n = 0
           For Each Match In s
                 If n = 0 Then
                       n = n + 1
                       ReDim ArrayFilter(n)
                       ArrayFilter(n) = Match
                 Else
                       bRepeat = False
                       For i = 0 To UBound(ArrayFilter)
                             If UCase(Match) = UCase(ArrayFilter(i)) Then
                                   bRepeat = True
                                   Exit For
                             End If
                       Next
                       If bRepeat = False Then
                             n = n + 1
                             ReDim Preserve ArrayFilter(n)
                             ArrayFilter(n) = Match
                       End If
                 End If
           Next
          
           Set s = Nothing
           Set re = Nothing
          
           strContent = Join(ArrayFilter, "|||")
           strContent = Replace(strContent, start, "")
           strContent = Replace(strContent, last, "")
          
           FindMatch = Replace(strContent, "|||", vbNullString, 1, 1)
           Exit Function
     End Function
     '================================================
     '函数名:CutFixed
     '作 用:截取固定的字符串
     '参 数:strHTML   ----原字符串
     '     start ------ 开始字符串
     '     last ------ 结束字符串
     '================================================
     Public Function CutFixed(ByVal strHTML, ByVal start, ByVal last)
           Dim s
           Dim Match
           Dim strPattern
           Dim strContent
           Dim t, l

           t = Len(start): l = Len(last)
           If t = 0 Or l = 0 Then Exit Function
           strPattern = "(" & CorrectPattern(start) & ")(.+?)(" & CorrectPattern(last) & ")"
           On Error Resume Next

           Dim re
           Set re = New RegExp
           re.IgnoreCase = False
           re.Global = False
           re.Pattern = strPattern

           Set s = re.Execute(strHTML)
           For Each Match In s
                 strContent = Match.Value
           Next

           Set s = Nothing
           Set re = Nothing
           CutFixed = Mid(strContent, t + 1, Len(strContent) - l - t)
           Exit Function
     End Function
     '================================================
     '函数名:CutFixate
     '返回值:截取后的字符串
     '================================================
     Public Function CutFixate(ByVal strHTML, ByVal start, ByVal last)
          
           Dim s
           Dim Match
           Dim strPattern
           Dim strContent
           Dim t, l

           t = Len(start): l = Len(last)
           If t = 0 Or l = 0 Then Exit Function

           strPattern = "(" & CorrectPattern(start) & ")(.+?)(" & CorrectPattern(last) & ")"

           On Error Resume Next

           Dim re
           Set re = New RegExp
           re.IgnoreCase = False
           re.Global = False
           re.Pattern = strPattern

           Set s = re.Execute(strHTML)
           For Each Match In s
                 strContent = Match.Value
           Next

           Set s = Nothing
           Set re = Nothing
          
           CutFixate = Trim(strContent)
           Exit Function
     End Function


共7页: 上一页 [1] 2 [3] [4] [5] [6] [7] 下一页
相 关 文 章   发布商链接
·ASP语言采用动网数据库制作显示IP图...
·如何在IIS中执行Python脚本的配置实...
·如何利用ASP把图片上传到数据库讲解...
·asp实现把文件存进access数据库并提...
·vbscript保留日期格式中的月份和日期...
·ASP脚本基础
·Asp实现人民币大小写转换代码
·ASP页面将数据库中检索数据生成到本...
·asp实现文件转移、压缩并直接存进数...
·ASP 实现压缩与解压代码
 §最新评论:(评论内容只代表网友观点,与本站立场无关!)
网名: 验证码:  【所有评论】【↑返回顶部
评 分: 12 345
评论内容:(不能超过500字,请自觉遵守互联网相关政策法规。[按 Ctrl+Enter 可直接提交]
注意:请勿在本站发布政治话题、色情及违反法律的内容。
IT知道网 声明:刊登此文章是为了传递更多信息,文章内容仅供参考,转载请注明出处。
推 荐 文 章
·vbscript保留日期格式中的月...
·asp实现把文件存进access数据
·如何利用ASP把图片上传到数据
·如何在IIS中执行Python脚本的
·ASP语言采用动网数据库制作显
·Asp技术实现数据导入进度状态
·asp防采集常用的六种方法
·asp动态生成RSS完整版代码
·ASP版+Ajax实现验证码通用模...
·asp中文数字验证码实现的代码
·VBS脚本调用系统的关机对话框
·ASP上传图片到数据库的代码
·ASP 实现压缩与解压代码
·asp实现文件转移、压缩并直接
·ASP页面将数据库中检索数据生
热 门 文 章
·VBS脚本调用系统的关机对话框...
·ASP版+Ajax实现验证码通用模...
·如何利用ASP把图片上传到数据...
·asp中文数字验证码实现的代码
·asp动态生成RSS完整版代码
·ASP上传图片到数据库的代码
·ASP页面将数据库中检索数据生...
·Asp实现人民币大小写转换代码
·asp实现文件转移、压缩并直接...
·ASP语言采用动网数据库制作显...
·asp实现把文件存进access数据...
·ASP脚本基础
·ASP 实现压缩与解压代码
·如何在IIS中执行Python脚本的...
·asp防采集常用的六种方法
网站首页 - 关于本站 - 加入收藏 - 网站地图 - 友情连接 - 在线留言 - 联系我们 - 返回顶部
Copyright © 2007 IT知道网.[冀ICP备07026896号]. All Rights Reserved .