?
商品詳情數據是電商分析的核心基礎,包含價格、規格、庫存、促銷等關鍵信息。本文將系統講解京東商品詳情接口的技術實現,重點解決動態參數構造、多維度數據提取、反爬機制應對等核心問題,提供一套合規高效的技術方案,同時嚴格遵守平臺規則與數據采集規范。
一、詳情接口原理與合規要點
京東商品詳情頁通過主接口加載基礎信息,配合多個輔助接口獲取規格、庫存、促銷等細分數據,采用 JSON 格式返回。實現該接口需遵循以下合規要點:
請求頻率控制:單 IP 對同一商品詳情請求間隔不低于 30 秒,單日單 IP 請求不超過 300 次
數據用途限制:僅用于個人學習研究、價格比較,不得用于商業競爭或惡意爬取
反爬機制尊重:不使用破解、偽造請求頭等手段,模擬正常用戶瀏覽行為
隱私保護:自動過濾任何可能涉及用戶隱私的信息,僅采集公開商品數據
商品詳情獲取的核心技術流程如下:
商品ID解析 → 主詳情接口請求 → 輔助接口數據補充 → 多源數據融合 → 結構化存儲


二、核心技術實現
1. 商品詳情參數生成器
京東商品詳情接口需要特定參數組合,包括商品 ID、動態簽名等,部分參數需實時生成:
運行
import time
import random
import hashlib
import string
class JdDetailParamsGenerator:
"""京東商品詳情參數生成器"""
def __init__(self):
self.app_key = "12574478" # 公共應用標識
self.platform = "h5" # 平臺標識
def generate_main_params(self, sku_id):
"""生成主詳情接口參數"""
t = str(int(time.time() * 1000))
nonce = self._generate_nonce(16)
params = {
"skuId": sku_id,
"cat": "", # 分類ID,留空自動獲取
"area": "1_72_2799_0", # 地區編碼
"shopId": "", # 店鋪ID,留空自動獲取
"venderId": "", # 商家ID,留空自動獲取
"paramJson": '{"platform":"' + self.platform + '"}',
"t": t,
"nonce": nonce,
"appkey": self.app_key,
"callback": f"jsonp_{int(time.time() * 1000)}_{random.randint(1000, 9999)}"
}
# 生成簽名
params["sign"] = self._generate_sign(params)
return params
def generate_stock_params(self, sku_id, area="1_72_2799_0"):
"""生成庫存接口參數"""
return {
"skuId": sku_id,
"area": area,
"cat": "",
"extraParam": '{"originid":"1"}',
"callback": f"jQuery{random.randint(1000000, 9999999)}_{int(time.time() * 1000)}"
}
def generate_price_params(self, sku_id):
"""生成價格接口參數"""
return {
"skuIds": f"J_{sku_id}",
"type": "1",
"area": "1_72_2799_0",
"callback": f"jQuery{random.randint(1000000, 9999999)}_{int(time.time() * 1000)}"
}
def _generate_nonce(self, length=16):
"""生成隨機字符串"""
return ''.join(random.choices(string.ascii_letters + string.digits, k=length))
def _generate_sign(self, params):
"""生成簽名"""
# 按參數名排序并拼接
sorted_params = sorted(params.items(), key=lambda x: x[0])
sign_str = "&".join([f"{k}={v}" for k, v in sorted_params if k != "sign"])
# 加入固定密鑰(示例)
sign_str += "&secret=jd_detail_demo_key"
# 計算MD5簽名
return hashlib.md5(sign_str.encode()).hexdigest().upper()
2. 詳情頁請求管理器
管理多個接口的請求發送,處理反爬機制和會話維護:
python
運行
import time
import random
import requests
from fake_useragent import UserAgent
class JdDetailRequester:
"""京東商品詳情請求管理器"""
def __init__(self, proxy_pool=None):
self.main_api = "https://h5api.m.jd.com/h5/mtop.taobao.detail.getdetail/6.0/"
self.stock_api = "https://c0.3.cn/stock"
self.price_api = "https://p.3.cn/prices/mgets"
self.proxy_pool = proxy_pool or []
self.ua = UserAgent()
self.session = requests.Session()
self.last_request_time = 0
self.min_interval = 30 # 同一商品請求最小間隔(秒)
# 初始化會話
self._init_session()
def _init_session(self):
"""初始化會話狀態"""
# 訪問首頁獲取基礎Cookie
self.session.get(
"https://www.jd.com",
headers=self._get_base_headers(),
timeout=10
)
# 設置基礎Cookie
self.session.cookies.set("ipLoc-djd", "1-72-2799-0", domain=".jd.com")
self.session.cookies.set("areaId", "1", domain=".jd.com")
def _get_base_headers(self):
"""基礎請求頭"""
return {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.9",
"Connection": "keep-alive"
}
def _get_headers(self, referer="https://www.jd.com/"):
"""生成詳情頁請求頭"""
headers = self._get_base_headers()
headers["Referer"] = referer
headers["X-Requested-With"] = "XMLHttpRequest"
return headers
def _get_proxy(self):
"""獲取隨機代理"""
if not self.proxy_pool:
return None
return random.choice(self.proxy_pool)
def _check_interval(self):
"""控制請求間隔"""
current_time = time.time()
elapsed = current_time - self.last_request_time
if elapsed < self.min_interval:
sleep_time = self.min_interval - elapsed + random.uniform(2, 5)
print(f"請求間隔不足,休眠 {sleep_time:.1f} 秒")
time.sleep(sleep_time)
self.last_request_time = time.time()
def fetch_main_detail(self, params):
"""獲取主詳情數據"""
self._check_interval()
headers = self._get_headers()
proxy = self._get_proxy()
proxies = {"http": proxy, "https": proxy} if proxy else None
try:
response = self.session.get(
self.main_api,
params=params,
headers=headers,
proxies=proxies,
timeout=15
)
if response.status_code != 200:
print(f"主詳情請求失敗,狀態碼: {response.status_code}")
return None
if self._is_blocked(response.text):
print("主詳情請求被攔截")
self._handle_blocked(proxy)
return None
return response.text
except Exception as e:
print(f"主詳情請求異常: {str(e)}")
return None
def fetch_stock(self, params):
"""獲取庫存數據"""
headers = self._get_headers(f"https://item.jd.com/{params['skuId']}.html")
proxy = self._get_proxy()
proxies = {"http": proxy, "https": proxy} if proxy else None
try:
response = self.session.get(
self.stock_api,
params=params,
headers=headers,
proxies=proxies,
timeout=15
)
if response.status_code != 200:
print(f"庫存請求失敗,狀態碼: {response.status_code}")
return None
return response.text
except Exception as e:
print(f"庫存請求異常: {str(e)}")
return None
def fetch_price(self, params):
"""獲取價格數據"""
headers = self._get_headers(f"https://item.jd.com/{params['skuIds'].split('_')[1]}.html")
proxy = self._get_proxy()
proxies = {"http": proxy, "https": proxy} if proxy else None
try:
response = self.session.get(
self.price_api,
params=params,
headers=headers,
proxies=proxies,
timeout=15
)
if response.status_code != 200:
print(f"價格請求失敗,狀態碼: {response.status_code}")
return None
return response.text
except Exception as e:
print(f"價格請求異常: {str(e)}")
return None
def _is_blocked(self, response_text):
"""判斷是否被反爬攔截"""
block_keywords = [
"驗證碼",
"訪問過于頻繁",
"安全驗證",
"請稍后再試"
]
for keyword in block_keywords:
if keyword in response_text:
return True
return False
def _handle_blocked(self, proxy):
"""處理被攔截情況"""
if proxy and proxy in self.proxy_pool:
self.proxy_pool.remove(proxy)
# 重新初始化會話
self._init_session()
# 延遲一段時間
time.sleep(random.uniform(10, 20))
3. 商品詳情數據解析器
解析多個接口返回的數據,提取結構化商品信息:
python
運行
import re
import json
from datetime import datetime
from lxml import etree
class JdDetailParser:
"""京東商品詳情數據解析器"""
def __init__(self):
# JSONP格式解析正則
self.jsonp_pattern = re.compile(r'jsonp_d+_d+((.*?))')
self.jquery_pattern = re.compile(r'jQueryd+_d+((.*?));')
def parse_main_detail(self, jsonp_text):
"""解析主詳情數據"""
# 提取JSON數據
match = self.jsonp_pattern.search(jsonp_text)
if not match:
return None
try:
json_data = json.loads(match.group(1))
except json.JSONDecodeError:
print("主詳情JSON解析失敗")
return None
# 檢查返回狀態
if json_data.get("ret", [""])[0] != "SUCCESS::調用成功":
return None
result = {}
data = json_data.get("data", {})
# 基礎信息提取
base = data.get("base", {})
result["product_id"] = base.get("skuId", "")
result["name"] = base.get("name", "").strip()
result["brand"] = base.get("brand", {}).get("name", "")
result["brand_id"] = base.get("brand", {}).get("id", "")
result["shop_name"] = base.get("shopInfo", {}).get("name", "")
result["shop_id"] = base.get("shopInfo", {}).get("shopId", "")
result["is_self"] = base.get("shopInfo", {}).get("isSelf", False)
# 分類信息
category = data.get("category", [])
result["categories"] = [c.get("name", "") for c in category if c.get("name")]
# 商品圖片
images = data.get("images", {})
result["main_images"] = [img.get("url", "") for img in images.get("imgList", [])]
result["video_url"] = images.get("videoInfo", {}).get("url", "")
# 商品參數
item_desc = data.get("itemDesc", {})
result["params"] = self._parse_params(item_desc.get("keyAttributes", []))
# 詳情描述
result["description"] = self._parse_description(item_desc.get("detail", ""))
return result
def parse_stock(self, jquery_text):
"""解析庫存數據"""
# 提取JSON數據
match = self.jquery_pattern.search(jquery_text)
if not match:
return None
try:
json_data = json.loads(match.group(1))
except json.JSONDecodeError:
print("庫存JSON解析失敗")
return None
stock = {
"has_stock": json_data.get("stock", {}).get("hasStock", False),
"stock_num": json_data.get("stock", {}).get("stockNum", 0),
"limit_buy": json_data.get("stock", {}).get("limitBuy", 0),
"warehouse": json_data.get("stock", {}).get("warehouse", "")
}
return stock
def parse_price(self, jquery_text):
"""解析價格數據"""
# 提取JSON數據
match = self.jquery_pattern.search(jquery_text)
if not match:
# 嘗試直接解析JSON
try:
json_data = json.loads(jquery_text)
except:
return None
else:
try:
json_data = json.loads(match.group(1))
except json.JSONDecodeError:
print("價格JSON解析失敗")
return None
if not isinstance(json_data, list) or len(json_data) == 0:
return None
price_info = json_data[0]
return {
"price": float(price_info.get("p", 0)),
"original_price": float(price_info.get("m", 0)) if price_info.get("m") else 0,
"currency": "CNY",
"update_time": datetime.now()
}
def _parse_params(self, key_attributes):
"""解析商品參數"""
params = {}
for attr in key_attributes:
name = attr.get("name", "").strip()
value = attr.get("value", "").strip()
if name and value:
params[name] = value
return params
def _parse_description(self, detail_html):
"""解析商品詳情描述"""
if not detail_html:
return []
# 提取圖片URL
tree = etree.HTML(detail_html)
img_tags = tree.xpath('//img/@src')
# 處理相對路徑
images = []
for img in img_tags:
if img.startswith(('http:', 'https:')):
images.append(img)
elif img.startswith('//'):
images.append(f"https:{img}")
elif img.startswith('/'):
images.append(f"https://item.jd.com{img}")
return images
def merge_details(self, main_detail, stock, price):
"""合并多源數據"""
if not main_detail:
return None
merged = main_detail.copy()
merged["stock"] = stock if stock else {}
merged["price"] = price if price else {}
merged["crawl_time"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
return merged
4. 商品規格處理器
解析商品規格參數和可選配置:
python
運行
import re
import json
class JdSkuSpecificationProcessor:
"""京東商品規格處理器"""
def __init__(self):
pass
def parse_specifications(self, html_content):
"""從HTML中解析規格信息"""
try:
# 查找規格數據
pattern = re.compile(r'var specData = (.*?);')
match = pattern.search(html_content)
if not match:
return None
spec_data = json.loads(match.group(1))
return self._process_spec_data(spec_data)
except Exception as e:
print(f"規格解析失敗: {str(e)}")
return None
def _process_spec_data(self, spec_data):
"""處理規格數據"""
result = {
"spec_groups": [], # 規格組
"sku_mapping": {} # SKU映射關系
}
# 處理規格組
for group in spec_data.get("specList", []):
spec_group = {
"name": group.get("name", ""),
"items": []
}
# 處理規格項
for item in group.get("specItemList", []):
spec_group["items"].append({
"name": item.get("name", ""),
"img_url": item.get("imgUrl", ""),
"selected": item.get("isSelected", False),
"disabled": item.get("disabled", False)
})
if spec_group["items"]:
result["spec_groups"].append(spec_group)
# 處理SKU映射
for sku in spec_data.get("skuList", []):
sku_id = sku.get("skuId", "")
if not sku_id:
continue
# 規格路徑
spec_path = []
for path in sku.get("specPath", "").split(";"):
if ":" in path:
_, value = path.split(":", 1)
spec_path.append(value)
result["sku_mapping"][sku_id] = {
"spec_path": spec_path,
"price": sku.get("price", ""),
"stock": sku.get("stock", 0),
"img_url": sku.get("imgUrl", "")
}
return result
def get_sku_by_spec(self, sku_mapping, spec_combination):
"""根據規格組合獲取SKU"""
if not sku_mapping or not spec_combination:
return None
# 遍歷SKU映射查找匹配項
for sku_id, sku_info in sku_mapping.items():
if self._match_spec(sku_info["spec_path"], spec_combination):
return {
"sku_id": sku_id,
"price": sku_info["price"],
"stock": sku_info["stock"],
"img_url": sku_info["img_url"]
}
return None
def _match_spec(self, sku_specs, target_specs):
"""匹配規格組合"""
if len(sku_specs) != len(target_specs):
return False
# 檢查所有規格是否匹配
for s1, s2 in zip(sku_specs, target_specs):
if s1 != s2:
return False
return True
三、完整商品詳情服務封裝
整合上述組件,實現完整的商品詳情獲取服務:
python
運行
class JdProductDetailService:
"""京東商品詳情服務"""
def __init__(self, proxy_pool=None):
self.sku_parser = JdSkuIdParser() # 復用之前實現的SKU解析器
self.params_generator = JdDetailParamsGenerator()
self.requester = JdDetailRequester(proxy_pool=proxy_pool)
self.parser = JdDetailParser()
self.spec_processor = JdSkuSpecificationProcessor()
def get_product_detail(self, product_url):
"""
獲取商品完整詳情
:param product_url: 商品詳情頁URL
:return: 完整的商品詳情字典
"""
# 1. 獲取商品SKU ID
print("解析商品SKU ID...")
sku_id = self.sku_parser.get_sku_id(product_url)
if not sku_id:
print("無法獲取商品SKU ID")
return None
print(f"商品SKU ID: {sku_id}")
# 2. 獲取主詳情數據
print("獲取主詳情數據...")
main_params = self.params_generator.generate_main_params(sku_id)
main_response = self.requester.fetch_main_detail(main_params)
if not main_response:
print("主詳情數據獲取失敗")
return None
main_detail = self.parser.parse_main_detail(main_response)
if not main_detail:
print("主詳情數據解析失敗")
return None
# 3. 獲取庫存數據
print("獲取庫存數據...")
stock_params = self.params_generator.generate_stock_params(sku_id)
stock_response = self.requester.fetch_stock(stock_params)
stock = self.parser.parse_stock(stock_response) if stock_response else None
# 4. 獲取價格數據
print("獲取價格數據...")
price_params = self.params_generator.generate_price_params(sku_id)
price_response = self.requester.fetch_price(price_params)
price = self.parser.parse_price(price_response) if price_response else None
# 5. 獲取規格數據
print("獲取規格數據...")
# 先獲取商品詳情頁HTML
html_response = self.requester.session.get(
product_url,
headers=self.requester._get_headers(),
timeout=15
)
specifications = self.spec_processor.parse_specifications(html_response.text)
# 6. 合并所有數據
full_detail = self.parser.merge_details(main_detail, stock, price)
if full_detail and specifications:
full_detail["specifications"] = specifications
print("商品詳情獲取完成")
return full_detail
四、使用示例與數據存儲
1. 基本使用示例
python
運行
def main():
# 代理池(實際使用時替換為有效代理)
proxy_pool = [
# "http://123.123.123.123:8080",
# "http://111.111.111.111:8888"
]
# 初始化商品詳情服務
detail_service = JdProductDetailService(proxy_pool=proxy_pool)
# 商品詳情頁URL
product_url = "https://item.jd.com/100012345678.html" # 替換為實際商品URL
# 獲取商品詳情
product_detail = detail_service.get_product_detail(product_url)
# 處理結果
if product_detail:
print(f"n商品名稱: {product_detail['name']}")
print(f"價格: {product_detail['price'].get('price', 0)}元")
print(f"是否有貨: {'有貨' if product_detail['stock'].get('has_stock', False) else '無貨'}")
print(f"店鋪: {product_detail['shop_name']} {'(自營)' if product_detail['is_self'] else ''}")
print(f"品牌: {product_detail['brand']}")
# 打印部分規格信息
if "specifications" in product_detail and product_detail["specifications"]["spec_groups"]:
print("n商品規格:")
for group in product_detail["specifications"]["spec_groups"][:2]: # 只顯示前2個規格組
print(f"- {group['name']}: {', '.join([item['name'] for item in group['items'][:5]])}")
# 打印主要參數
if product_detail["params"]:
print("n主要參數:")
for i, (key, value) in enumerate(list(product_detail["params"].items())[:5]):
print(f"- {key}: {value}")
else:
print("商品詳情獲取失敗")
if __name__ == "__main__":
main()
2. 詳情數據存儲工具
python
運行
import json
import csv
import pandas as pd
from pathlib import Path
from datetime import datetime
class JdDetailStorage:
"""京東商品詳情存儲工具"""
def __init__(self, storage_dir="./jd_product_details"):
self.storage_dir = Path(storage_dir)
self.storage_dir.mkdir(exist_ok=True, parents=True)
def save_to_json(self, product_detail):
"""保存為JSON格式(完整數據)"""
sku_id = product_detail.get("product_id", "unknown")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"jd_detail_{sku_id}_{timestamp}.json"
file_path = self.storage_dir / filename
with open(file_path, "w", encoding="utf-8") as f:
json.dump(product_detail, f, ensure_ascii=False, indent=2, default=str)
print(f"完整詳情已保存至JSON: {file_path}")
return file_path
def save_to_csv(self, product_detail):
"""保存為CSV格式(基礎數據)"""
sku_id = product_detail.get("product_id", "unknown")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"jd_detail_basic_{sku_id}_{timestamp}.csv"
file_path = self.storage_dir / filename
# 提取基礎信息
basic_info = {
"product_id": product_detail.get("product_id", ""),
"name": product_detail.get("name", ""),
"brand": product_detail.get("brand", ""),
"price": product_detail.get("price", {}).get("price", 0),
"original_price": product_detail.get("price", {}).get("original_price", 0),
"shop_name": product_detail.get("shop_name", ""),
"shop_id": product_detail.get("shop_id", ""),
"is_self": product_detail.get("is_self", False),
"has_stock": product_detail.get("stock", {}).get("has_stock", False),
"stock_num": product_detail.get("stock", {}).get("stock_num", 0),
"categories": "/".join(product_detail.get("categories", [])),
"crawl_time": product_detail.get("crawl_time", "")
}
# 轉換為DataFrame
df = pd.DataFrame([basic_info])
df.to_csv(file_path, index=False, encoding="utf-8-sig")
print(f"基礎信息已保存至CSV: {file_path}")
return file_path
def save_specifications(self, product_detail):
"""單獨保存規格數據"""
if "specifications" not in product_detail:
print("無規格數據可保存")
return None
sku_id = product_detail.get("product_id", "unknown")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"jd_specs_{sku_id}_{timestamp}.json"
file_path = self.storage_dir / filename
with open(file_path, "w", encoding="utf-8") as f:
json.dump(product_detail["specifications"], f, ensure_ascii=False, indent=2)
print(f"規格數據已保存: {file_path}")
return file_path
五、合規優化與風險提示
1. 系統優化策略
多級緩存機制:實現內存緩存 + 文件緩存的多級緩存策略
python
運行
def get_cached_detail(self, sku_id, max_age=3600):
"""從緩存獲取商品詳情"""
# 先檢查內存緩存
# 再檢查文件緩存
# 緩存過期策略實現
return None
智能請求調度:根據商品重要程度和更新頻率,動態調整抓取頻率
異常重試機制:實現指數退避重試策略,提高成功率
2. 合規與風險提示
商業應用前必須獲得京東平臺書面授權,遵守《電子商務法》相關規定
不得將采集的商品數據用于生成與京東競爭的產品或服務
嚴格控制請求頻率,避免對平臺服務器造成負擔
當檢測到反爬機制加強時,應立即降低請求頻率或暫停服務
尊重商品信息版權,不濫用采集的數據
通過本文提供的技術方案,可構建一套功能完善的京東商品詳情接口系統。該方案實現了從多接口數據采集、解析到融合的全流程處理,支持商品基礎信息、價格、庫存和規格等多維度數據的獲取,為電商數據分析、比價系統等場景提供技術支持。在實際應用中,需根據平臺規則動態調整策略,確保系統的穩定性和合法性。
?審核編輯 黃宇
-
API
+關注
關注
2文章
2376瀏覽量
66806 -
京東
+關注
關注
2文章
1108瀏覽量
50087
發布評論請先 登錄
京東商品詳情API接口詳解:獲取商品標題、價格、庫存等核心數據
閑魚商品詳情 API 接口文檔
標題:技術實戰 | 如何通過API接口高效獲取亞馬遜平臺商品詳情數據
淘寶商品詳情API接口技術解析與實戰應用
京東商品詳情接口實戰解析:從調用優化到商業價值挖掘(附避坑代碼)
京東商品詳情商品詳情接口技術實現:從數據抓取到結構化解析全方案
評論