Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
25 kB
4
Indexable
Never
import json
import time
from slugify import slugify
import threading
import random
import asyncio
import math
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
import requests
PROXY_HOST = ''  # rotating proxy or host
PROXY_PORT = ''# port
PROXY_USER = '' # username
PROXY_PASS = '' # password
manifest_json=""
background_js =""
import pandas as pd
import requests
import pymysql
import os
import zipfile
# Đường dẫn tới tệp Excel
excel_file = './test.xlsx'

import aiohttp
import asyncio
import json

connection_data=pymysql.connect(host="103.110.84.6", user="pdjtvluc_tuan", passwd="lazadasp1", db="pdjtvluc_lazadadb",port=3306)

async def save_db(item,category_sp):
    # print(item)
    is_load = 0;
    stock = 0
    conn = connection_data
    cursor = conn.cursor()
    conn.ping()  # reconnecting mysql
    with conn.cursor() as cursor: 
        sql = """
                INSERT INTO product(product_id,name ,discount,price,price_min,price_max,typer_shop,historical_sold,image,images,liked_count,
                link,rating,shop_location,stock,category,is_load
                ) VALUE(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
                """ 
        val =(int(item['itemId']),str(item['name']), float(item['discount']), float(item['price']),
              float(item['price']), float(item['originalPrice']), str(item['Shopmall']),
               float(item['itemSoldCntShow']), str(item['image']),str(item['images']),str(item['review']),
               str(item['itemUrl']), float(item['ratingScore']), str(item['location']),
               stock,category_sp, is_load,

              )
        cursor.execute(sql,val)
        conn.commit()
    conn.close()
    
    
    
# Đọc tệp Excel
df = pd.read_excel(excel_file)


path =  "C:/Users/Admin/Desktop/lazada/backend/tool/chromedriver.exe"
async def fetch(url):
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            return await response.text()
async def get_proxy_sele():
    global PROXY_HOST
    global PROXY_PORT
    global PROXY_USER
    global PROXY_PASS
    location ="vn_dn"
    random_number = random.choice([0, 1])
    if(   random_number == 0):
        location ="vn_dn"
    else:
        location ="vn_hcm"
    server_host_s  = -1
    count = 0
    while(server_host_s  == -1):  
        if(count >= 2):
            print("dang ngu dong")
            time.sleep(60) 
            count = count + 1
            
            if(count >= 4):
                count = 0
        try:
            url = "https://api.tinproxy.com/proxy/get-new-proxy?authen_ips=14.191.157.215&location={}&api_key=70QpHJ5sKS3i7kgdJtuKrdh9M0BsHXbK".format(location)
            r = await fetch(url)
            data = json.loads(r)
            server_host = data['data']['http_ipv6_ipv4']
            user_name= data['data']['authentication']['username']
            pass_word =  data['data']['authentication']['password']
                
                    
            ip, port = server_host.split(":")
                # print("Địa chỉ IP:", ip)
                # print("Cổng:", port)
            PROXY_HOST =  ip
            PROXY_PORT = port
            PROXY_USER = user_name
            PROXY_PASS = pass_word
            my_proxies2 = {
                    "PROXY_HOST":ip,
                    "PROXY_PORT":port,
                    "PROXY_USER": user_name,
                    "PROXY_PASS":pass_word ,      
                }
            if( server_host != None or  server_host != ""):
                server_host_s = 1
        except:
            server_host_s = -1
            count = count + 1
    
    print("get proxy cho selenium",my_proxies2) 
    return   my_proxies2 




async def getmb():
    print("lay proxy")
    await get_proxy_sele()
    time.sleep(1)
    global manifest_json
    global background_js
    manifest_json = """
    {
        "version": "1.0.0",
        "manifest_version": 2,
        "name": "Chrome Proxy",
        "permissions": [
            "proxy",
            "tabs",
            "unlimitedStorage",
            "storage",
            "<all_urls>",
            "webRequest",
            "webRequestBlocking"
        ],
        "background": {
            "scripts": ["background.js"]
        },
        "minimum_chrome_version":"22.0.0"
    }
    """

    background_js = """
    var config = {
            mode: "fixed_servers",
            rules: {
            singleProxy: {
                scheme: "http",
                host: "%s",
                port: parseInt(%s)
            },
            bypassList: ["localhost"]
            }
        };
    chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
    function callbackFn(details) {
        return {
            authCredentials: {
                username: "%s",
                password: "%s"
            }
        };
    }
    chrome.webRequest.onAuthRequired.addListener(
                callbackFn,
                {urls: ["<all_urls>"]},
                ['blocking']
    );
    """ % (PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS)
    
    
def get_chromedriver2(use_proxy=True, user_agent=None):
   
    path = os.path.dirname(os.path.abspath(__file__))
    chrome_options = webdriver.ChromeOptions()
    pluginfile = 'proxy_auth_plugin.zip'

# Kiểm tra xem tệp đã tồn tại hay không
    if os.path.exists(pluginfile):
        # Xóa tệp nếu đã tồn tại
        os.remove(pluginfile)
        print("Đã xóa tệp", pluginfile)
 
    time.sleep(0.2)   
    if use_proxy:
        pluginfile = 'proxy_auth_plugin.zip'

        with zipfile.ZipFile(pluginfile, 'w') as zp:
            zp.writestr("manifest.json", manifest_json)
            zp.writestr("background.js", background_js)
        chrome_options.add_extension(pluginfile)
    if user_agent:
        chrome_options.add_argument('--user-agent=%s' % user_agent)
        chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
        chrome_options.add_experimental_option('useAutomationExtension', False)
        chrome_options.add_experimental_option(
            "excludeSwitches",
            """
                ignore-certificate-errors
                safebrowsing-disable-download-protection
                safebrowsing-disable-auto-update
                disable-client-side-phishing-detection
            """.split()
        )
    driver = webdriver.Chrome(
        os.path.join(path, 'chromedriver'),
        chrome_options=chrome_options)
    set_device_metrics_override = dict({
                  "width": 375,
                  "height": 812,
                  "deviceScaleFactor": 50,
                  "mobile": True
              })

    # driver.execute_cdp_cmd('Emulation.setDeviceMetricsOverride', set_device_metrics_override)
    driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
    # driver.execute_cdp_cmd('Network.setUserAgentOverride', {"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36'})
    return driver
def get_chromedriver(use_proxy=True, user_agent=None):
    
    path = os.path.dirname(os.path.abspath(__file__))
    chrome_options = webdriver.ChromeOptions()
    pluginfile = 'proxy_auth_plugin.zip'
    time.sleep(0.2)   
    if use_proxy:
        pluginfile = 'proxy_auth_plugin.zip'

        with zipfile.ZipFile(pluginfile, 'w') as zp:
            zp.writestr("manifest.json", manifest_json)
            zp.writestr("background.js", background_js)
        chrome_options.add_extension(pluginfile)
    if user_agent:
        chrome_options.add_argument('--user-agent=%s' % user_agent)
        chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
        chrome_options.add_experimental_option('useAutomationExtension', False)
        chrome_options.add_experimental_option(
            "excludeSwitches",
            """
                ignore-certificate-errors
                safebrowsing-disable-download-protection
                safebrowsing-disable-auto-update
                disable-client-side-phishing-detection
            """.split()
        )
    driver = webdriver.Chrome(
        os.path.join(path, 'chromedriver'),
        chrome_options=chrome_options)
    set_device_metrics_override = dict({
                  "width": 375,
                  "height": 812,
                  "deviceScaleFactor": 50,
                  "mobile": True
              })

    # driver.execute_cdp_cmd('Emulation.setDeviceMetricsOverride', set_device_metrics_override)
    driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
    # driver.execute_cdp_cmd('Network.setUserAgentOverride', {"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36'})
    return driver



async def get_cookie(url2):
    counts = 0
    # lan 1
    if(counts == 0):
        await getmb()
    driver = get_chromedriver(use_proxy=True)
    driver.set_window_rect(100,200,375,812)
    url ="https://www.lazada.vn/dien-thoai-di-dong/"
    driver.get(url)
    driver.execute_script("window.open('{}', '_blank')".format(url2))
    
    
    driver.switch_to.window(driver.window_handles[-1]) # Chuyển sang tab mới mở
    try:
        r=(driver.find_element("xpath",("/html/body/pre")).text)
    except:
        driver.close() 
        driver.switch_to.window(driver.window_handles[-1])
        xc = 0
        while(xc == 0):
            time.sleep(0.3)
                
            counts = 1
            time.sleep(0.5)
            await getmb()
            time.sleep(0.5)
            driver = get_chromedriver2(use_proxy=True)
            driver.set_window_rect(100,200,375,812)
            url ="https://www.lazada.vn/dien-thoai-di-dong/"
            driver.get(url)
            
            try:
                driver.execute_script("window.open('{}', '_blank')".format(url2))
                r=(driver.find_element("xpath",("/html/body/pre")).text)
                time.sleep(0.2)
                xc = 1
                break
            except:
                xc = 0
              
            driver.switch_to.window(driver.window_handles[-1]) # Chuyển sang tab mới mở
            driver.close()
            driver.switch_to.window(driver.window_handles[-1]) 
            driver.close()
            time.sleep(0.2)      
          
          
    return  r
    


def get_new_proxy():
    r = requests.get("https://api.tinproxy.com/proxy/get-new-proxy?authen_ips=103.107.183.30&location=vn_hcm&api_key=70QpHJ5sKS3i7kgdJtuKrdh9M0BsHXbK")
    data = json.loads(r.text)
    try:
        server_host = data['data']['http_ipv6_ipv4']
        user_name= data['data']['authentication']['username']
        pass_word =  data['data']['authentication']['password']
        my_proxies = {
                "http"  : "http://{}:{}@{}".format(user_name,pass_word,server_host),
                
                }
        print(my_proxies) 
    except:
        server_host  = -1
        while(server_host  == -1):
            time.sleep(30)
            r = requests.get("https://api.tinproxy.com/proxy/get-new-proxy?authen_ips=103.107.183.30&location=vn_hcm&api_key=70QpHJ5sKS3i7kgdJtuKrdh9M0BsHXbK")
            data = json.loads(r.text)
            try:
                server_host = data['data']['http_ipv6_ipv4']
                user_name= data['data']['authentication']['username']
                pass_word =  data['data']['authentication']['password']
                my_proxies = {
                        "http"  : "http://{}:{}@{}".format(user_name,pass_word,server_host),
                        
                        }
            except:
                pass
    
    
    return my_proxies



def info_item(data,list_item,key1,key2,key3):
    
    items = data['mods']['listItems']
    brandId = -1
    brandName = ""
    discount  = -1
    image = ""
    itemId = -1
    itemSoldCntShow=""
    itemUrl = ""
    nid = -1
    originalPrice = 0
    originalPriceShow = ""
    price = ""
    priceShow = 0
    ratingScor = 0
    review = ""
    sellerId= 0
    sellerName = ""
    itemUrl  =""
    sku =""
    skuId=""
    Shopmall = ""
    itemId = -1
    images = []
    for item in items:
        brandId = item['brandId']
        brandName  = item['brandName']
        
        itemId = item['itemId']
     
        try:
            discount  = item['discount']
            if(discount != None):
                discount = discount.split("%")[0]
        except:
            discount = 0
        image  = item['image']
        itemId  = item['itemId']
        try:
            itemSoldCntShow = item['itemSoldCntShow']
            try:
                itemSoldCntShow =  itemSoldCntShow[: itemSoldCntShow.index("sold")].strip()
            except:
                pass
            try:
                itemSoldCntShow =  itemSoldCntShow[: itemSoldCntShow.index("Đã bán")].strip()
            except:
                pass
            try:
                itemSoldCntShow =  itemSoldCntShow[: itemSoldCntShow.index("+ Đã bán")].strip()
            except:
                pass
            try:
                itemSoldCntShow =  itemSoldCntShow[: itemSoldCntShow.index("+")].strip()
            except:
                pass
            try:
                itemSoldCntShow =  itemSoldCntShow[: itemSoldCntShow.index("k")].strip()
            except:
                pass
            itemSoldCntShow =  itemSoldCntShow.replace(",",".")
        except:
            itemSoldCntShow = 0
        itemUrl = item['itemUrl']
        location = item['location']
        name =  item['name']
        nid = item['nid']
        try:
            originalPrice = item['originalPrice']
            
        except:
            pass
        try:
            originalPriceShow  = item['originalPriceShow']
        except:
            pass
        
        try:
            price = item['price']
        except:
            pass
        try:
            priceShow = item['priceShow']
            ratingScore = item['ratingScore']
        except:
            pass
        try:
            review  = item['review']
          
        except:
            pass
        sellerId = item['sellerId']
        sellerName = item['sellerName']
        sku  = item['sku']
        skuId = item['skuId']
    
       
        try:
            
            Shopmall = item['icons'][0]['alias']
           
        except:
            pass
        
        
        img_data = item['thumbs'];
        # print(img_data)
        try:
            for item_img in img_data:
                # print(item_img['image'])
                images.append(item_img['image'])
        except:
            pass
        
        
        
        
        name_new = name.lower()
        key1_new = key1.lower()
        key2_new = key2.lower()
        key3_new = key3
      
       
        if(key3_new != "x"):
            # print("lay 2 dieu kien")
            time.sleep(0.1)
            if(key1_new in name_new and key2_new in name_new):
               
                # print(itemId)
                # print(name)
            
                # print("-=----")
                list_item.append({
                    "key": itemId,
                    "itemId":itemId,
                    "brandId":brandId,
                    "brandName":brandName,
                    "discount": discount,
                    "image":image ,
                    "itemId":itemId,
                    "itemSoldCntShow":float(itemSoldCntShow),
                    "itemUrl": itemUrl ,
                    "location":location,
                    "name": name,
                    "nid": nid,
                    #  price max
                    "originalPrice":originalPrice,
                    "originalPriceShow": originalPriceShow,
                    "price": float(price),
                    # price min
                    "priceShow":priceShow,
                    "ratingScore":ratingScore,
                    "review": review,
                    "sellerId": sellerId,
                    "Shopmall": Shopmall,
                    "images":images
                    
                })
                
        else:
            # print("lay 3 dieu kien")
            key3_new =  key3.lower()
            if(key1_new in name_new and key2_new in name_new  and key3_new in name_new):
                # print(itemId)
                # print(name)
                # print("-=----")
                list_item.append({
                    "key": itemId,
                    "itemId":itemId,
                    "brandId":brandId,
                    "brandName":brandName,
                    "discount": discount,
                    "image":image ,
                    "itemId":itemId,
                    "itemSoldCntShow": float(itemSoldCntShow),
                    "itemUrl": itemUrl ,
                    "location":location,
                    "name": name,
                    "nid": nid,
                    #  price max
                    "originalPrice":originalPrice,
                    "originalPriceShow": originalPriceShow,
                    "price": float(price),
                    # price min
                    "priceShow":priceShow,
                    "ratingScore":ratingScore,
                    "review": review,
                    "sellerId": sellerId,
                    "Shopmall": Shopmall,
                    "images":images
                    
                })
               
        images = []

async def get_data(url,list_data,key1,key2,key3,index):
 
  #Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1
#Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36
    headers = {
        "cookie":"__wpkreporterwid_=813ed48b-595c-4adf-9f22-24f1b8328e2a; t_fv=1670425558316; t_uid=ZfqbNVCVcuji2pNujFDgD7X8IpYsozLZ; cna=1pkXHJ5Pty8CAXRvHRQmtaHQ; lzd_cid=96503b91-03f6-4a84-a33d-a69b147980af; miidlaz=miidgg5rbu1gmghens6gt8p; lwrid=AQGHIpP%2FCbpv79mAoewzX39uI4vP; _bl_uid=g6l2nf8gqRFoUCf52j0wg9ktCRb8; _gcl_au=1.1.1020666756.1680854480; _ga=GA1.2.303867327.1681100355; cto_axid=qVhKOIlvdz8gBvNCGiZJpvO9HK21W2gI; pdp_sfo=1; dsa_category_disclaimer=true; _gcl_aw=GCL.1684482945.CjwKCAjwvJyjBhApEiwAWz2nLbwA1YN9Bte0-xwC9cjeLL5W8bOre5Flpn6stpzXeN396SwHoXCH5BoCSaQQAvD_BwE; lzd_click_id=clkgl3rhh1h18ddqulh34f; _uetvid=a70580a0d8e511ed8ba31b9e895a65ea; cto_bundle=o16OV19JR2tsbmZUQjNSMlZXdU1nJTJCSnlJTkdsa0FqeHpOZjgwOHU5QyUyQkFzZzRFSU9MYmNwb1h0anY3bWozS1BIaiUyRkxQUnRoS3kzZURzazZKUnBiNjRMZ0hNSWJxVlNEYzc5QnFDNCUyQkswMXBNVTVQaTVEbXlYaDQ5SGxjSnZMS3ZMTWZOWGc1Nml6TE8wRGRkZmRlJTJCWlpYNkd3JTNEJTNE; AMCV_126E248D54200F960A4C98C6%40AdobeOrg=-1124106680%7CMCIDTS%7C19504%7CMCMID%7C52344899913288371074101774141992786277%7CMCAAMLH-1685675655%7C3%7CMCAAMB-1685675655%7CRKhpRz8krg2tLO6pguXWp5olkAcUniQYPHaMWWgdJ3xzPWQmdj0y%7CMCOPTOUT-1685078055s%7CNONE%7CvVersion%7C5.2.0; sgcookie=E100QFP%2BvohEo9oGTPL5SxXeeRlcKciHrMGTIHpFzH6Ej%2F7suAYVHgFLAVoD6SK81cStjX%2B6v7paxaoNBBGj4OBHTs%2FcXoQaMr2fPRFTDhLnV2A%3D; hng=VN|en|VND|704; userLanguageML=en; xlly_s=1; EGG_SESS=S_Gs1wHo9OvRHCMp98md7DUZe1_Oi7MP7VJBGrkx9fhSS7nQTnQJeqqXEBD6IWifQZY9APBbZ1NYv6XiRfJqEz8D_8i-At2bdf63doGKaAyOxhfxDvfM3CbBdWPewHrMN0lq3c1joYuwyaSJfxbUY4Skd9BkzmJRNg2pqZfhI3M=; ak_bmsc=366442E74FE13AEE7E9E22CBF35E72F3~000000000000000000000000000000~YAAQnwqrcV0uBmaIAQAAw7vqZxOadtWFkCIDKA3lr2K8jKVeIC2TUkasu4H6eKqmQuWeXkL9exGezkGcDBTQ0Osjf3qxB8vWzlGcSI9HHkP+1rA/LyLzgplq5luPovmO/seeTA9FpIpICoYIvBbPi9q6hpL0TfXYov3Wo4K8JXdxX5pJcy7s+tEaLEdXznfgGgjt9uFcRRZnsLasg9Ch5y3tnfajog3cQaZvMLi/NlRVddfH8v1wl175TKjerLWSxnLqiHushxvP+dM1VUTlzqpuZD8/5rYYtNRXgOCfvIPplv1UhjViv8pdoFtkizVYYBedlO+X1vXmISciWdiPG6ZREcJGzUFZTL4fpn6er4mwQqGdXN16Lpsbv2blc+7SfL8NExaTKT7n; lzd_sid=108de1144761220db8a1b3ec4f3d3855; _m_h5_tk=0eed2417b17557ffec7129878cb23744_1685379977546; _m_h5_tk_enc=bf0dad79f26dddd63250e7d71e0f4568; _tb_token_=fba9008553a37; t_sid=ZnaxX3Uxmr2XbTfOIbtN1nL4EFoWEXGD; utm_channel=NA; bm_sv=5662071E1A54AC7ED7E09096D4EF806D~YAAQnwqrcVuFB2aIAQAAQBVBaBNNYUHVTfW7H252VbTwWqy6HVSsrWwCibETEQQ0liYOnyxdWYwTnHBuHADoFf6UWx7lALZ3NbY4ET8AEMtpsakFkJ/pzDw91QjxrVUeOEF6umAN/tTD/mPAWUGmNPPLZ0jLwbLZsWH4RYlRfza8O/BQVzpD38dsFsAbSzhRj/4RAJioi23RAMhNmaeyIpFW/cjvFWjei4zBw6zueTW9qZP/lGqR5h+6Zv6LQaql~1; tfstk=cpWPBO2LQ8ey4EILWKvF7wNkTWvRZjIlxx-6ZrAhoXaor3Oli-gpmtj90Hiq-Qf..; l=fBjr2dWuTojiVbfXBOfwPurza77OSIRAguPzaNbMi9fP_8fp5g0hW1aR5989C3MNFspJR3l39SNMBeYBqCmLM4QoBKRS3xkmnRScdTf..; isg=BLS04uxkbJMHsv-7VdCYBfSBhXImjdh3aPJCk04VQD_CuVQDdp2oB2p3OelhQRDP",
        "x-csrf-token":"fba9008553a37",
   
        "accept": "application/json, text/plain, */*",
        "accept-language": "en-US,en;q=0.9",
           
      
        "x-requested-with": "XMLHttpRequest"
    }
    
    #proxies=proxies
    print(url)
    time.sleep(0.1)
    # proxiess = get_new_proxy()
    random_number = random.choice([0])
    print(random_number)
    r=""
    if( random_number == 0):
        r = requests.get(url,headers=headers)
    else:
        pass
        # proxiesss = get_new_proxy()
        # print(proxiesss )
        # r = requests.get(url,headers=headers, proxies = proxiesss )
        
 
    try:
        
        info_item(json.loads(r.text), list_data,key1,key2,key3) 
        # print(r.text)
    except:
        
        laco  = 0
        count_laco = 0
        while(laco == 0):
          
            r = await get_cookie(url)
            # print(r)
            time.sleep(0.1)
            if( count_laco   == 3):
                df.loc[index, 'chan'] = 'da thu 3 lan'
                df.loc[index,'sl'] =len(list_data)
                df.to_excel('./test.xlsx', index=False)
                #  lỗi quá 3 lần danh dau da thu
                laco = 1
            try:
                info_item(json.loads(r), list_data,key1,key2,key3) 
                laco = 1
            except:
                count_laco =  count_laco + 1
                pass   
async def a(key, key1, key2,key3,index,category):
    list_data = []

    key_slug = slugify(key, separator="-", lowercase=True)
    for x in range(3):
        x = x + 1
        
        # url ="https://www.lazada.vn/catalog/?ajax=true&isFirstRequest=true&page={}&q={}".format(x, key)
        
        
        # ko có danh mục trên lâzdaa
     
        url = "https://www.lazada.vn/tag/{}/?ajax=true&catalog_redirect_tag=true&page={}&q={}".format( key_slug ,x, key)
        
        # dành cho danh mục có trên lazada
        # url ="https://www.lazada.vn/{}/?ajax=true&catalog_redirect_tag=true&page=4&q={}".format( key_slug , key)
        await get_data(url,list_data,key1,key2,key3,index)
    print(len(list_data))
    #loc ra các id khong trung
    unique_items = []
    unique_item_ids = set()

    for item in list_data:
        if item['itemId'] not in unique_item_ids:
            unique_item_ids.add(item['itemId'])
            unique_items.append(item)
    
    
    if(len(list_data) >=8  and len(list_data) < 35  ):
        #  viết hàm lưu thôi
      
   
        
        for item in unique_items:
            await save_db(item,category)
        
        
        # danh dau excel
        df.loc[index, 'crawl'] = 'da crawl'
        df.loc[index,'sl'] =len( unique_items)
        df.to_excel('./test.xlsx', index=False)
    if(len(list_data) >= 35):
        sorted_list = sorted( unique_items, key=lambda x: x['itemSoldCntShow'], reverse=True)
        print(len(sorted_list))
        top_25_items = sorted_list[:35]
        # print(sorted_list)
     
        for item in  top_35_items:
            await save_db(item,category)
        df.loc[index, 'crawl'] = 'da crawl'
        df.loc[index,'sl'] =len( unique_items)
        df.to_excel('./test.xlsx', index=False)

   


async def duyet_excel():
    # Duyệt qua từng dòng trong DataFrame
    
    for index, row in df.iterrows():
        # Lấy giá trị từng cột trong dòng
        key = row['key']
        key1 = row['key1']
        key2 = row['key2']
        key3 = row['key3']
        category = row['test']
     
        await a(key, key1, key2,key3,index,category )

# Tạo và chạy event loop
loop = asyncio.get_event_loop()
loop.run_until_complete(duyet_excel())
loop.close()