首页 ┆ 网站地图 ┆ 在线留言 ┆ 游戏资讯 ┆ 资源下载 ┆ 端午节祝福 ┆ 迅雷在线影视
设为首页
加入收藏
联系我们
高级搜索
您当前的位置: 主页>ASP专区>ASP教学>Asp开发的可以用来做采集与分析html代码的类
Asp开发的可以用来做采集与分析html代码的类
来源: 发布时间:2008-06-25 发布人: 浏览: 人次   字体: [ ]  

     '================================================
     '函数名:RemoteToLocal
     '作 用:替换字符串中的远程文件为本地文件并保存远程文件
     '参 数:
     '     sHTML     : 要替换的字符串
     '     sExt     : 执行替换的扩展名
     '================================================
     Public Function RemoteToLocal(ByVal sHTML, ByVal strPath)
           Dim s_Content
           Dim re
           Dim RemoteFile
           Dim RemoteFileUrl
           Dim SaveFileName
           Dim SaveFileType
           Dim a_RemoteUrl()
           Dim n
           Dim i
           Dim l
           Dim bRepeat
           Dim nFileNum
           Dim sContentPath
           s_Content = sHTML
          
           On Error Resume Next
          
           Set re = New RegExp
           re.IgnoreCase = True
           re.Global = True
           re.Pattern = "((http|https|ftp|rtsp|mms):(\/\/|\\\\){1}(([A-Za-z0-9_-])+[.]){1,}(net|com|cn|org|cc|tv|[0-9]{1,3})(\S*\/)((\S)+[.]{1}(" & sAllowExtName & ")))"
           Set RemoteFile = re.Execute(s_Content)
           n = 0
           '---- 转入无重复数据
           For Each RemoteFileUrl In RemoteFile
                 If n = 0 Then
                       n = n + 1
                       ReDim a_RemoteUrl(n)
                       a_RemoteUrl(n) = RemoteFileUrl
                 Else
                       bRepeat = False
                       For i = 1 To UBound(a_RemoteUrl)
                             If UCase(RemoteFileUrl) = UCase(a_RemoteUrl(i)) Then
                                   bRepeat = True
                                   Exit For
                             End If
                       Next
                       If bRepeat = False Then
                             n = n + 1
                             ReDim Preserve a_RemoteUrl(n)
                             a_RemoteUrl(n) = RemoteFileUrl
                       End If
                 End If
           Next
           Set RemoteFile = Nothing
           Set re = Nothing
           If n = 0 Then
                 PathFileName = ""
                 RemoteToLocal = s_Content
                 Exit Function
           End If
           '---- 开始替换操作
           Dim UploadPath
           l = InStrRev(strPath, "UploadPic", -1)
           UploadPath = Right(strPath, Len(strPath) - l + 1)
          
           nFileNum = 0
           For i = 1 To n
                 SaveFileType = Mid(a_RemoteUrl(i), InStrRev(a_RemoteUrl(i), ".") + 1)
                 SaveFileName = GetRndFileName(SaveFileType)
                 If SaveRemoteFile(strPath & SaveFileName, a_RemoteUrl(i)) = True Then
                       nFileNum = nFileNum + 1
                       If nFileNum > 0 Then
                             PathFileName = PathFileName & "|"
                       End If
                       PathFileName = PathFileName & UploadPath & SaveFileName
                       s_Content = Replace(s_Content, a_RemoteUrl(i), strPath & SaveFileName, 1, -1, 1)
                 End If
           Next
           RemoteToLocal = s_Content
           Exit Function
     End Function
     Public Function FormatUrl(ByVal str)
           If Not IsNull(str) And Trim(str) <> "" And LCase(str) <> "http://" And Len(str) < 255 Then
                 str = Trim(Replace(Replace(Replace(Replace(str, vbNewLine, ""), Chr(9), ""), Chr(39), ""), Chr(34), ""))
                 If InStr(str, "://") > 0 Then
                       FormatUrl = str
                 Else
                       FormatUrl = "http://" & str
                 End If
           Else
                 FormatUrl = ""
           End If
     End Function
     '--内容过滤
     Public Function Html2Ubb(ByVal strContent, ByVal sRemoveCode)
           On Error Resume Next
           If Len(strContent) > 0 Then
                 Dim ArrayCodes
                 Dim re
                 Set re = New RegExp
                 If Len(sRemoveCode) < 21 Then sRemoveCode = "1|1|0|0|0|0|0|0|0|0|0|0"
                 ArrayCodes = Split(sRemoveCode, "|")
                
                 re.IgnoreCase = True
                 re.Global = True
                
                 '--清除script脚本
                 If CInt(ArrayCodes(0)) = 1 Then
                       re.Pattern = "(<s+cript(.+?)<\/s+cript>)"
                       strContent = re.Replace(strContent, "")
                 End If
                 '--清除所有iframe框架
                 If CInt(ArrayCodes(1)) = 1 Then
                       re.Pattern = "(<iframe(.+?)<\/iframe>)"
                       strContent = re.Replace(strContent, "")
                 End If
                 '--清除所有object对象
                 If CInt(ArrayCodes(2)) = 1 Then
                       re.Pattern = "(<object(.+?)<\/object>)"
                       strContent = re.Replace(strContent, "")
                 End If
                 '--清除所有java applet
                 If CInt(ArrayCodes(3)) = 1 Then
                       re.Pattern = "(<applet(.+?)<\/applet>)"
                       strContent = re.Replace(strContent, "")
                 End If
                 '--清除所有div标签
                 If CInt(ArrayCodes(4)) = 1 Then
                       re.Pattern = "(<DIV>)|(<DIV(.+?)>)"
                       strContent = re.Replace(strContent, "")
                       re.Pattern = "(<\/DIV>)"
                       strContent = re.Replace(strContent, "")
                 End If
                 '--清除所有font标签
                 If CInt(ArrayCodes(5)) = 1 Then
                       re.Pattern = "(<FONT>)|(<FONT(.+?)>)"
                       strContent = re.Replace(strContent, "")
                       re.Pattern = "(<\/FONT>)"
                       strContent = re.Replace(strContent, "")
                 End If
                 '--清除所有span标签
                 If CInt(ArrayCodes(6)) = 1 Then
                       re.Pattern = "(<SPAN>)|(<SPAN(.+?)>)"
                       strContent = re.Replace(strContent, "")
                       re.Pattern = "(<\/SPAN>)"
                       strContent = re.Replace(strContent, "")
                 End If
                 '--清除所有A标签
                 If CInt(ArrayCodes(7)) = 1 Then
                       re.Pattern = "(<A>)|(<A(.+?)>)"
                       strContent = re.Replace(strContent, "")
                       re.Pattern = "(<\/A>)"
                       strContent = re.Replace(strContent, "")
                 End If
                 '--清除所有img标签
                 If CInt(ArrayCodes(8)) = 1 Then
                       re.Pattern = "(<IMG(.+?)>)"
                       strContent = re.Replace(strContent, "")
                 End If
                 '--清除所有FORM标签
                 If CInt(ArrayCodes(9)) = 1 Then
                       re.Pattern = "(<FORM>)|(<FORM(.+?)>)"
                       strContent = re.Replace(strContent, "")
                       re.Pattern = "(<\/FORM>)"
                       strContent = re.Replace(strContent, "")
                 End If
                 '--清除所有HTML标签
                 If CInt(ArrayCodes(10)) = 1 Then
                       re.Pattern = "<(.[^>]*)>"
                       strContent = re.Replace(strContent, "")
                 End If
                 re.Pattern = "(" & Chr(8) & "|" & Chr(9) & "|" & Chr(10) & "|" & Chr(13) & ")"
                 strContent = re.Replace(strContent, vbNullString)
                 re.Pattern = "(<!--(.+?)-->)"
                 strContent = re.Replace(strContent, vbNullString)
                 re.Pattern = "(<TBODY>)"
                 strContent = re.Replace(strContent, "")
                 re.Pattern = "(<\/TBODY>)"
                 strContent = re.Replace(strContent, "")
                 re.Pattern = "(<" & Chr(37) & ")"
                 strContent = re.Replace(strContent, "<%")
                 re.Pattern = "(" & Chr(37) & ">)"
                 strContent = re.Replace(strContent, "%>")
                 Set re = Nothing
                 Html2Ubb = strContent
           Else
                 Html2Ubb = ""
           End If
           Exit Function
     End Function

共7页: 上一页 [1] [2] [3] [4] 5 [6] [7] 下一页
相 关 文 章   发布商链接
·ASP语言采用动网数据库制作显示IP图...
·如何在IIS中执行Python脚本的配置实...
·如何利用ASP把图片上传到数据库讲解...
·asp实现把文件存进access数据库并提...
·vbscript保留日期格式中的月份和日期...
·ASP脚本基础
·Asp实现人民币大小写转换代码
·ASP页面将数据库中检索数据生成到本...
·asp实现文件转移、压缩并直接存进数...
·ASP 实现压缩与解压代码
 §最新评论:(评论内容只代表网友观点,与本站立场无关!)
网名: 验证码:  【所有评论】【↑返回顶部
评 分: 12 345
评论内容:(不能超过500字,请自觉遵守互联网相关政策法规。[按 Ctrl+Enter 可直接提交]
注意:请勿在本站发布政治话题、色情及违反法律的内容。
IT知道网 声明:刊登此文章是为了传递更多信息,文章内容仅供参考,转载请注明出处。
推 荐 文 章
·vbscript保留日期格式中的月...
·asp实现把文件存进access数据
·如何利用ASP把图片上传到数据
·如何在IIS中执行Python脚本的
·ASP语言采用动网数据库制作显
·Asp技术实现数据导入进度状态
·asp防采集常用的六种方法
·asp动态生成RSS完整版代码
·ASP版+Ajax实现验证码通用模...
·asp中文数字验证码实现的代码
·VBS脚本调用系统的关机对话框
·ASP上传图片到数据库的代码
·ASP 实现压缩与解压代码
·asp实现文件转移、压缩并直接
·ASP页面将数据库中检索数据生
热 门 文 章
·VBS脚本调用系统的关机对话框...
·ASP版+Ajax实现验证码通用模...
·如何利用ASP把图片上传到数据...
·asp中文数字验证码实现的代码
·asp动态生成RSS完整版代码
·ASP上传图片到数据库的代码
·ASP页面将数据库中检索数据生...
·Asp实现人民币大小写转换代码
·asp实现文件转移、压缩并直接...
·ASP语言采用动网数据库制作显...
·asp实现把文件存进access数据...
·ASP脚本基础
·ASP 实现压缩与解压代码
·如何在IIS中执行Python脚本的...
·asp防采集常用的六种方法
网站首页 - 关于本站 - 加入收藏 - 网站地图 - 友情连接 - 在线留言 - 联系我们 - 返回顶部
Copyright © 2007 IT知道网.[冀ICP备07026896号]. All Rights Reserved .