1,获取以及商品分类信息
给一网页获取网页上商品信息的分类
using Skay.WebBot;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading;
using System.Windows.Forms;
using Ivony.Html;
using Ivony.Html.Parser;
using System.Data.SqlClient;
namespace catchGoods
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
public static Thread th;
private void button1_Click(object sender, EventArgs e)
{
th = new Thread(GetJDData);
th.Start();
}
public void GetJDData()
{
SqlConnection conn = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
conn.Open();
string str = "http://www.htluxe.com";
HttpUtility http = new HttpUtility();
string html = http.GetHtmlText(str);
var documenthtml = new JumonyParser().Parse(html);
var items = documenthtml.Find(".categroup dl");
foreach(var item in items)
{
string name = item.FindFirst("h4 a").InnerText();
string remarkOdd = item.FindFirst("h4 a").Attribute("href").Value();
string remark = remarkOdd.Split(‘=‘)[1];
this.Invoke((EventHandler)(delegate
{
listBox1.Items.Add(name);
}));//有线程时listbox添加东西的时候要这么写不然报错谁知道什么鬼(委托?
string into = string.Format("insert into exerciseOneSort (className, remark) values (‘" + name + "‘, ‘" + remark + "‘)");
SqlCommand com = new SqlCommand(into, conn);
int i = com.ExecuteNonQuery();
var elements = item.Find("dt p a");
foreach(var element in elements)
{
string nameTwo = element.InnerText();
string url = "http://www.htluxe.com/" + element.Attribute("href").Value();
string intoTwo = string.Format("insert into exerciseTwoSort (className, url, idplus) values (‘" + nameTwo + "‘, ‘" + url + "‘, ‘" + remark + "‘)");
SqlCommand comTwo = new SqlCommand(intoTwo, conn);
int j = comTwo.ExecuteNonQuery();
}
}
}
}
}
原文:http://www.cnblogs.com/Tinamei/p/5162163.html