`

JAVA调用VB6.0爬取网页

阅读更多

用VB6.0解决网页获取问题,在一些情况下可以省点事。

比如百度链接现在直接访问都302了,VB调用浏览器控件,可以绕开此类问题。

对于怼着一个网站爬,中间必须休眠的应用,干脆上VB。

核心代码如下。

 

'全局变量
Public counter As Integer


Private Sub Form_Load()
    '获取外部传入的参数
    Dim para
    If Command <> "" Then
        para = Command
    End If
    WebBrowser1.Silent = True
    'WebBrowser1.Navigate "http://www.baidu.com/link?url=-8BvUVFXO52WMTvdy10cCekVvQ1nLMUtwVlPh02aEhMM3rTKN2JiAQcNYlYRmLdSdU7xpI9V9AuVPlBKbr7PqK"
    'WebBrowser1.Navigate url
    'MsgBox para
    'para = "1000丨丨丨http://www.baidu.com/link?url=tGeuyVVTLkLc_pbME3n_p8LMicqdmPWKuza0h3PQT1D7SMZLYkj_6Pxn-jBCw57jRcVRUBNrg5q4RzYMvIx8BAEEKHOGGMLTGYzGkdI_NhC"
    '字符串截取测试
    Dim index
    index = InStr(1, para, "丨丨丨")
    'MsgBox index
    'Dim counter
    '截取左侧字符串
    counter = Left(para, index - 1)
    'MsgBox counter
    Dim url
    '截取右侧字符串
    url = Right(para, Len(para) - index - 2)
    'MsgBox url
    '
    WebBrowser1.Navigate url
End Sub

Private Sub WebBrowser1_DocumentComplete(ByVal pDisp As Object, url As Variant)
    If (pDisp Is WebBrowser1.Object) Then
        'MsgBox (WebBrowser1.Document)
        'Dim HTML As HTMLDocument
        Dim strData As String
        'strData = WebBrowser1.Document.body.innerText
        strData = WebBrowser1.Document.body.innerHtml
        'Set HTML = WebBrowser1.Document
        'strData = HTML.documentElement.innerHTML
        'MsgBox (strData)
        Text1.Text = strData
        '写文件
        Dim nHandle As Integer, fName As String
        'fName = "d:\test.html"
        fName = "d:\book\w" & counter & ".html"
        nHandle = FreeFile
        Open fName For Output As #nHandle
        Print #nHandle, strData
        Close nHandle
        '退出程序
        Unload Me
    End If
End Sub


Private Sub Form_Resize()
    WebBrowser1.Width = Me.ScaleWidth
    WebBrowser1.Height = Me.ScaleHeight
End Sub

 

对于VB生成的EXE,用JAVA调用

 

public class TestExe {

	public static void main(String[] args) throws IOException, InterruptedException {
		String link = "http://www.baidu.com/link?url=tGeuyVVTLkLc_pbME3n_p8LMicqdmPWKuza0h3PQT1D7SMZLYkj_6Pxn-jBCw57jRcVRUBNrg5q4RzYMvIx8BAEEKHOGGMLTGYzGkdI_NhC";
		int counter = 0;
		callExe(link, counter);
	}

	public static void callExe(String link, int counter) {
		Process process = null;
		String[] cmds = { "D:\\软件\\VisualBasic6.0-SCqy\\Project1\\工程1.exe", counter + "丨丨丨" + link };
		// "http://www.baidu.com/link?url=tGeuyVVTLkLc_pbME3n_p8LMicqdmPWKuza0h3PQT1D7SMZLYkj_6Pxn-jBCw57jRcVRUBNrg5q4RzYMvIx8BAEEKHOGGMLTGYzGkdI_NhC"
		// };
		try {
			process = new ProcessBuilder(cmds).start();
			System.out.println(process.isAlive()); // true
			int exitVal = process.waitFor();
			System.out.println(exitVal); // 0
			System.out.println(process.isAlive()); // false
		} catch (Exception e) {
			System.out.println(e.getMessage());
		}
	}

}

 

 调用EXE后阻塞,EXE执行完之后之后退出程序,JAVA继续执行。

 

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics