使用java采集京东商城行政区划数据示例_java

分享到:


package com.test.html;

import com.alibaba.fastjson.JSON;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
public class JD_Address
{
    private static int timeout = 300000;
    private static int count = 0;

    public static void main(String[] args) throws IOException, InterruptedException
    {
        String province_url = "http://trade.jd.com/dynamic/consignee/getProvinces.action";
        String city_url = "http://trade.jd.com/dynamic/consignee/getCitys.action?consigneeParam.provinceId=";
        String county_url = "http://trade.jd.com/dynamic/consignee/getCountys.action?consigneeParam.cityId=";
        String town_url = "http://trade.jd.com/dynamic/consignee/getTowns.action?consigneeParam.countyId=";

        //getList(town_url + "4139");
        List<Address> provinces = getList(province_url);
        for (Address province : provinces)
        {
            List<Address> citys = getList(city_url + province.getId());
            province.setChildren(citys);

            for (Address city : citys)
            {
                List<Address> countys = getList(county_url + city.getId());
                city.setChildren(countys);
                for (Address county : countys)
                {
                    List<Address> towns = getList(town_url + county.getId());
                    county.setChildren(towns);
                }
            }
        }
        System.out.println("=======");
        FileUtils.writeStringToFile(new File("保存位置/address2.js"), "var ds=" + JSON.toJSONString(provinces));
    }

    private static List<Address> getList(String url) throws IOException, InterruptedException
    {
        List<Address> list = new ArrayList<Address>();
        Document doc = Jsoup.parse(new URL(url), timeout);
        Elements elements = doc.select("option[value~=\\d+]");
        for (Element element : elements)
        {
            Address address = new Address();
            address.setName(element.text().replace("*", StringUtils.EMPTY));
            address.setId(element.attr("value"));
            list.add(address);

            //System.out.println(address.getName());
        }
        ++count;
        if (count % 100 == 0)
        {
            System.out.println(count);
        }
        if (count % 500 == 0)
        {
            Thread.sleep(5000);
        }
        return list;
    }
}

class Address
{
    private String name;
    private String id;
    private List<Address> children;

    public String getName()
    {
        return name;
    }
    public void setName(String name)
    {
        this.name = name;
    }
    public List<Address> getChildren()
    {
        return children;
    }
    public void setChildren(List<Address> children)
    {
        this.children = children;
    }
    public String getId()
    {
        return id;
    }
    public void setId(String id)
    {
        this.id = id;
    }
}

JavaScript]代码


[
        {"children": [
            {"children": [
                {"id": "2799", "name": "三环以内"},
                {"id": "2819", "name": "三环到四环之间"},
                {"id": "2839", "name": "四环到五环之间"},
                {"id": "2840", "name": "五环到六环之间"},
                {"id": "4137", "name": "管庄"},
                {"id": "4139", "name": "北苑"},
                {"id": "4211", "name": "定福庄"}
            ], "id": "72", "name": "朝阳区"},
            {"children": [
                {"id": "2848", "name": "三环以内"},
                {"id": "2849", "name": "三环到四环之间"},
                {"id": "2850", "name": "四环到五环之间"},
                {"id": "2851", "name": "五环到六环之间"},
                {"id": "2852", "name": "六环以外"},
                {"id": "4134", "name": "西三旗"},
                {"id": "4209", "name": "西二旗"}
            ], "id": "2800", "name": "海淀区"},
            {"children": [
                {"id": "2827", "name": "内环到二环里"},
                {"id": "2853", "name": "二环到三环"}
            ], "id": "2801", "name": "西城区"},
            {"children": [
                {"id": "2821", "name": "内环到三环里"}
            ], "id": "2802", "name": "东城区"},
            {"children": [
                {"id": "2829", "name": "一环到二环"},
                {"id": "2842", "name": "二环到三环"}
            ], "id": "2803", "name": "崇文区"},
            {"children": [
                {"id": "2828", "name": "内环到三环里"}
            ], "id": "2804", "name": "宣武区"},
            {"children": [
                {"id": "2832", "name": "四环到五环之间"},
                {"id": "2854", "name": "二环到三环"},
                {"id": "2855", "name": "三环到四环之间"},
                {"id": "34544", "name": "五环到六环之间"},
                {"id": "34545", "name": "六环之外"}
            ], "id": "2805", "name": "丰台区"},
            {"children": [
                {"id": "2831", "name": "四环到五环内"},
                {"id": "4187", "name": "石景山城区"},
                {"id": "4188", "name": "八大处科技园区"}
            ], "id": "2806", "name": "石景山区"},
            {"children": [
                {"id": "6491", "name": "城区以内"},
                {"id": "2843", "name": "郊区"}
            ], "id": "2807", "name": "门头沟"},
            {"children": [
                {"id": "6492", "name": "城区以内"},
                {"id": "2844", "name": "郊区"}
            ], "id": "2808", "name": "房山区"},
            {"children": [
                {"id": "4175", "name": "五环到六环之间"},
                {"id": "37643", "name": "六环以外(于家务乡)"},
                {"id": "51150", "name": "六环以外(张家湾镇、台湖镇、

昵    称:
验证码: