用asp现抓取土豆等网站视频、缩略图程序

来源:互联网 时间:1970-01-01

用asp现抓取土豆 优库 酷6 等网站 视频、缩略图、标题的程序
----------------------------------------------------------------------

源文件说明:

index.html 演示界面
mt_parse.asp 分析视频网址的核心程序
mt_function.asp 自定义函数库
jquery.js 实现ajax的jquery框架
loading.gif 加载效果的GIF动画


本系统里面的所有URL均使用的相对路径,一般情况下无须更改即可正常运行

----------------------------------------------------------------------
程序运行环境:

WIN2000 + IIS5.0 或其它支持asp 3.0 与Microsoft.XMLHTTP组件的WEB服务器

[下载源文件]

核心文件:mt_function.asp

<%


'------------------------------------
' 获得URL 内容
'------------------------------------
Function getSource(url)
on error resume next
set oSend=createobject("Microsoft.XMLHTTP")
oSend.open "GET",url,false
oSend.send()
if err.number<>0 then
getSource = err.description
end if
SourceCode = oSend.responseBody

strReturn = ""
For i = 1 To LenB(SourceCode)
ThisCharCode = AscB(MidB(SourceCode,i,1))
If ThisCharCode < &H80 Then
strReturn = strReturn & Chr(ThisCharCode)
Else
NextCharCode = AscB(MidB(SourceCode,i+1,1))
strReturn = strReturn & Chr (CLng(ThisCharCode) * &H100 + CInt(NextCharCode))
i = i + 1
End If
Next
set oSend = nothing
getSource = strReturn
End Function


'------------------------------------
' 忽略大小写的正则表达式
'------------------------------------
Function searchExp(strng,patrn)
dim regex, match, matches ' 建立变量。
set regex = new regexp ' 建立正则表达式。
regex.pattern = patrn ' 设置模式。
regex.ignorecase = true ' 设置是否区分字符大小写。
regex.global = true ' 设置全局可用性。
set matches = regex.execute(strng) ' 执行搜索。
for each match in matches ' 遍历匹配集合。
retstr = match.value
next
searchExp = retstr
set regex = nothing

End Function

Function replaceExp(sourceStr,patrn,rep)
dim re
Set re = New RegExp
re.Pattern = patrn ' 设置模式。
re.ignorecase = true ' 设置是否区分字符大小写。
replaceExp = re.replace(sourceStr,rep)
set re = nothing
End Function

'------------------------------------
'随机数
'------------------------------------
function random(MaxNum,MinNum)
dim RndNumber
Randomize
RndNumber=int((MaxNum-MinNum-1)*rnd+MinNum+1)
random=RndNumber
end function
%>

mt_parse.asp

<!--#include file="mt_function.asp" --><%
on error resume next
Response.Charset = "GB2312"
dim str
dim title
dim status
dim detailurl
dim url
dim picurls
dim callback

url = request("url")
'判断是否视频网址
if len(searchExp(url,"htt(.*?)://(.*?)"))>0 then
'首先判断地址的来源
if InStr(url,"tudou") then
'http://www.tudou.com/programs/view/ndDDPrFOPsw/
'土豆的解析
if InStr(url,"programs")=false then
url = replaceExp(url,"v","programs/view")
end if
SourceCode = getSource(url)

'获得缩略图
str = searchExp(SourceCode,"<span class=.s_pic.>(.*?)<//span>")
str = replaceExp(str,"<span class=.s_pic.>","")
picurls = replaceExp(str,"<//span>","")

'获得数据文件
detailurl = replaceExp(url,"programs/view","v")

elseif InStr(url,"youku") then
'优库的解析
'http://player.youku.com/player.php/sid/XMTMxMTgzMzA4=/v.swf
'http://static.youku.com/v1.0.0064/v/swf/qplayer.swf?VideoIDS=XMTMxMTgzMzA4=&embedid=-&showAd=0

Response.Charset = "utf-8"
if InStr(url,"v_show")=false then
str = replaceExp(url,"player.youku.com/player.php/sid/","v.youku.com/v_show/id_")
url = replaceExp(str,"=/v.swf",".html")
end if

SourceCode = getSource(url)
'获得缩略图
str = searchExp(SourceCode,"<a charset(.*?) id=.download(.*?)/|.>(.*)<//a>")
str = replaceExp(str,"<a (.*?) href=.","")
str = replaceExp(str,"/|.>(.*)<//a>","")
picurls = replaceExp(str,"(.*)/|","")

'获得数据文件
str = searchExp(SourceCode,"<input(.*?)id=""link""(.*?)//>")
str = replaceExp(str,"<input(.*?)value=""","")
detailurl = replaceExp(str,"""(.*)","")

elseif InStr(url,"v.blog.sohu.com") then
'SOHU的解析http://v.blog.sohu.com/fo/v4/2873152
if InStr(url,"u/vw")=false then
str = replaceExp(url,"v.blog.sohu.com/(.*?)/(.*?)/","v.blog.sohu.com/u/vw/")
url = replaceExp(str,"/v.swf",".html")
end if
SourceCode = getSource(url)
'获得缩略图
picurls = "/Article/UploadFiles/201101/20110112170637438.gif"
'获得数据文件
str = searchExp(SourceCode,"<input(.*?)id=""iptVideoFlash""(.*?)//>")
str = replaceExp(str,"<input(.*?)value=""","")
detailurl = replaceExp(str,"""(.*)","")
elseif InStr(url,"ku6") then
'酷6的解析
if InStr(url,"v.ku6.com/show")=false then
str = replaceExp(url,"player.ku6.com/refer","v.ku6.com/show")
url = replaceExp(str,"/v.swf",".html")
end if
SourceCode = getSource(url)

'获得缩略图
str = searchExp(SourceCode,"<span class=.s_pic.>(.*?)<//span>")
str = replaceExp(str,"<span class=.s_pic.>","")
picurls = replaceExp(str,"<//span>","")
'获得swf视频文件
str = replaceExp(url,"v.ku6.com/show","player.ku6.com/refer")
detailurl = replaceExp(str,".html","/v.swf")

else
SourceCode = getSource(url)
end if

'获得标题
str = searchExp(SourceCode,"<title>(.*?)<//title>")
str = replaceExp(str,"<title>","")
title = replaceExp(str,"<//title>","")
end if
if title="" then
'不是合法URL
status=0
elseif picurls="" then
'普通网页
status=2
else
'视频
status =1
end if

callback=request("callback")
if callback<>"" then
response.write callback
response.write "("
end if
%>{"site":"miantuan.net","keywords":"面团网","status":"<%=status %>","item":{"title":"<%=title %>","url":"<%=url %>","objecturl":"<%=detailurl %>","spicurl":"<%=picurls %>"}}<%
if callback<>"" then
response.write ")"
end if
%>

相关阅读:
Top