본문 바로가기

Python Programming/Projects

Automation System for Crawling Information from Sustainalytics

 

1. Imports

import pandas as pd
import numpy as np
import re
import requests
import bs4
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.common.exceptions import *
from selenium.webdriver.common.keys import Keys
import time
data = pd.read_excel(r'C:\dataset\crawling_url_total_.xlsx')
data
  company main_url sub_url
0 Novo Nordisk A/S https://www.novonordisk.com/ https://www.novonordisk.com/sustainable-busine...
1 DSV Panalpina A/S https://www.dsv.com/en/about-dsv/coronavirus https://www.dsv.com/en/about-dsv/corporate-res...
2 Vestas Wind Systems A/S https://www.vestas.com/en/about/csr#!inclusive... https://www.vestas.com/en/investor/financial_r...
3 Orsted A/S https://orsted.com//en/about-us https://orsted.com/en/sustainability
4 Genmab A/S https://www.genmab.com/who-we-are/ https://ir.genmab.com/corporate-social-respons...
... ... ... ...
297 GrowGeneration Corp https://ir.growgeneration.com/company-information https://esg.censible.co/companies/GrowGenerati...
298 The Hershey Co https://www.hersheyland.com/home/about.html https://www.thehersheycompany.com/en_us/sustai...
299 PepsiCo Inc https://www.pepsico.com//about/diversity-equit... https://www.pepsico.com/sustainability/overview
300 Darling Ingredients Inc https://www.darlingii.com//about-us https://www.darlingii.com/csr#:~:text=Darling%...
301 Mondelez International https://www.mondelezinternational.com//About-Us https://www.mondelezinternational.com/Snacking...

302 rows × 3 columns

companies = data['company']
companies
0             Novo Nordisk A/S
1            DSV Panalpina A/S
2      Vestas Wind Systems A/S
3                   Orsted A/S
4                   Genmab A/S
                ...           
297        GrowGeneration Corp
298             The Hershey Co
299                PepsiCo Inc
300    Darling Ingredients Inc
301     Mondelez International
Name: company, Length: 302, dtype: object

2. Company Name Preprocessing

abbs = ['A/S', 'Corp', 'Inc', 'Ltd', 'PLC',
        'ADR', 'SA', 'Co', 'AG', 'SE', 'SpA',
        'S.A.', 'Inc.', 'NV', 'ASA', 'Group', 
        'Corporation', 'Company']

company_revised = []

for i in data['company']:
    splited = i.split(' ')
    for j in splited:
        if j in abbs:
            i = re.sub(j, '', i)
    i = i.rstrip()
    company_revised.append(i)

company_revised
['Novo Nordisk',
 'DSV Panalpina',
 'Vestas Wind Systems',
 'Orsted',
 'Genmab',
 'Carlsberg',
 'Coloplast',
 'Novozymes',
 'A. P. Moller Maersk',
 'Pandora',
 'MSCI',
 'Goldman Sachs FS Treasury Intms Instl',
 'Microsoft',
 'The Home Depot',
 'NVIDIA',
 'Hess',
 'BlackRock',
 'Morgan Stanley',
 'Honeywell International',
 "Lowe's Companies",
 'Vale',
 'BHP',
 'Rio Tinto',
 'Petroleo Brasileiro  Petrobras',
 'Fortescue Metals',
 'International Paper',
 'Tyson Foods',
 'Nutrien',
 'Archer-Daniels Midland',
 'Veolia Environnement',
 'Danaher',
 'Ecolab',
 'Ferguson',
 'Geberit',
 'Pentair',
 'Roper Technologies',
 'Waters',
 'American Water Works',
 'IDEX',
 'Itron',
 'Advanced Drainage Systems',
 'Xylem',
 'A.O. Smith',
 'The Toro',
 'Royal Bank of Canada',
 'Shopify',
 'The Toronto-Dominion Bank',
 'Enbridge',
 'Bank of Nova Scotia',
 'Canadian National Railway',
 'Brookfield Asset Management',
 'Bank of Montreal',
 'Canadian Pacific Railway',
 'TC Energy',
 'Investor',
 'Nordea Bank Abp',
 'Atlas Copco',
 'Telefonaktiebolaget L M Ericsson',
 'Volvo',
 'KONE Oyj',
 'Nokia Oyj',
 'Aptiv',
 'Johnson Controls International',
 'Eaton',
 'ABB',
 'Schneider Electric',
 'Enphase Energy',
 'Samsung SDI',
 'SolarEdge Technologies',
 'Prysmian',
 'Terna',
 'Canadian Imperial Bank of Commerce',
 'Essential Utilities',
 'AstraZeneca',
 'Unilever',
 'HSBC Holdings',
 'Diageo',
 'GlaxoSmithKline',
 'Rio Tinto',
 'BP',
 'British American Tobacco',
 'Royal Dutch Shell',
 'Welltower',
 'Ventas',
 'Healthpeak Properties',
 'Omega Healthcare Investors',
 'Orpea',
 'Amedisys',
 'LHC',
 'Ensign',
 'Aedifica',
 'Sabra Health Care REIT',
 'Alphabet',
 'Visa',
 'The Walt Disney',
 'Salesforce.com',
 'Tesla',
 'Intel',
 'Procter & Gamble',
 'ASML Holding',
 'Roche Holding',
 'Nestle',
 'AIA',
 'Allianz',
 'Wesfarmers',
 'Hong Kong Exchanges and Clearing',
 'ITOCHU Techno-Solutions',
 'Admiral',
 'MS&AD Insurance  Holdings',
 'Henkel',
 'RELX',
 'Croda International',
 'Novartis',
 'Scout24',
 'Royal Mail',
 'Investec',
 'BT',
 'Morrison (Wm) Supermarkets',
 'Glencore',
 'EVRAZ',
 'Ashtead',
 'Entain',
 'B&M European Value Retail',
 'Spectris',
 'Equinor',
 'DNB',
 'Telenor',
 'Mowi',
 'Yara International',
 'Norsk Hydro',
 'Orkla',
 'Tomra Systems',
 'Adevinta',
 'Gjensidige Forsikring',
 'Dj U.S. Semiconductors Index Swap Societe Generale',
 'Broadcom',
 'Texas Instruments',
 'Qualcomm',
 'Wipro',
 'Akzo Nobel',
 'Magna International',
 'Vivendi',
 'Tokyo Electron',
 'Nippon Telegraph & Telephone',
 'Mitsubishi',
 'Power  of Canada',
 'Commonwealth Bank of Australia',
 'BHP',
 'CSL',
 'Westpac Banking',
 'National Australia Bank',
 'Australia and New Zealand Banking',
 'Macquarie',
 'Woolworths',
 'Rio Tinto',
 'LVMH Moet Hennessy Louis Vuitton',
 'Toyota Motor',
 'SAP',
 'Berkshire Hathaway',
 'JPMorgan Chase &',
 'Bank of America',
 'Wells Fargo &',
 'Citigroup',
 'Goldman Sachs',
 'Sony',
 "L'Oreal",
 'Keyence',
 'Siemens',
 'Daikin Industries',
 'AXA',
 'CRH',
 'Flutter Entertainment',
 'Kerry',
 'Kingspan',
 'Smurfit Kappa',
 'Ryanair Holdings',
 'Grafton   Shs',
 'Icon',
 'Glanbia',
 'Bank of Ireland',
 'Intermediate Capital',
 'Smith (DS)',
 'Rightmove',
 'Howden Joinery',
 'Electrocomponents',
 'Weir',
 'Pennon',
 'Dechra Pharmaceuticals',
 'IMI',
 'Barclays',
 'Emera',
 'AerCap Holdings N.V.',
 'Wells Fargo &',
 'JPMorgan Chase & Co.',
 'Societe Generale',
 'Credit Suisse',
 'Enbridge Incorporation',
 'Lloyds Banking  plc',
 'Global Atlantic Finance',
 'Assa Abloy',
 'Hexagon',
 'Sandvik',
 'Evolution AB',
 'Skandinaviska Enskilda Banken',
 'UPM-Kymmene Oyj',
 'Avery Dennison',
 'Amcor  Ordinary Shares',
 'Weyerhaeuser',
 'Mondi',
 'Packaging  of America',
 'WestRock',
 'Stora Enso Oyj',
 "Land O'Lakes",
 'Australia and New Zealand Banking',
 'Assured Guaranty Municipal Holdings',
 'Markel',
 'Dominion Energy',
 'The Bank of New York Mellon',
 'Wells Fargo &',
 'Duke Energy',
 'Citigroup',
 'HSBC Capital',
 'TransCanada Trust',
 'Bank of America',
 'Sumitomo Life Insurance',
 'MetLife',
 'NatWest  plc',
 'Nordea Bank ABP',
 'Credit Agricole',
 'Societe Generale',
 'HSBC Holdings plc',
 'ING Groep N.V.',
 'UBS  Funding (Switzerland)',
 'Standard Chartered plc',
 'AT&T',
 'Computershare',
 'Brother Industries',
 'Daiwa House Industry',
 'Koninklijke Ahold Delhaize',
 'Lawson',
 'T&D Holdings',
 'ITOCHU',
 'Toyota Tsusho',
 'AGL Energy',
 'Telefonica',
 'BNP Paribas',
 'Banco Santander',
 'Zurich Insurance',
 'UBS',
 'ING Groep',
 'Prudential',
 'Lloyds Banking',
 'Prosus',
 'Koninklijke Philips',
 'Adyen',
 'Koninklijke DSM',
 'Wolters Kluwer',
 'Heineken',
 'BlackRock Cash Funds Treasury SL Agency',
 'Bank of Nova Scotia',
 'Avangrid',
 'Target',
 'Ingersoll-Rand Global Holdings  Limited',
 'Banco Bilbao Vizcaya Argentaria,',
 'Target',
 'Kellogg',
 'Automatic Data Processing,',
 'Sands China',
 'NextEra Energy',
 'Exelon',
 'Public Service Enterprise',
 'Sempra Energy',
 'Alliant Energy',
 'CMS Energy',
 'Xcel Energy',
 'Fortis',
 'NiSource',
 'Northrop Grumman',
 'General Dynamics',
 'Keurig Dr Pepper',
 'Altria',
 'Hormel Foods',
 'Nucor',
 'Vulcan Materials',
 'Conagra Brands',
 'Clorox',
 'Taiwan Semiconductor Manufacturing',
 'ASML Holding',
 'Advanced Micro Devices',
 'Micron Technology',
 'Applied Materials',
 'Marvell Technology',
 'Analog Devices',
 'KLA',
 'JM Smucker',
 'General Mills',
 'Bunge',
 'GrowGeneration',
 'The Hershey',
 'PepsiCo',
 'Darling Ingredients',
 'Mondelez International']

3. Define Function (Using Selenium Chrome Driver)

rating_num_lst = []
rating_text_lst = []
industry_lst = []
industry_rank_lst = []
company_original_name = []
company_lst = []
company_site_lst = []


def sustainalytics(company):
    driver.get(search_tool)
    input_element = driver.find_element_by_css_selector('#searchInput')
    input_element.send_keys(company)
    time.sleep(2)
    click_element = driver.find_element_by_css_selector('#searchResults > div > div')
    click_element.click()

    rating_num_xpath = '/html/body/section[2]/section[1]/div/div[1]/div[1]/div[3]/div[1]/div[1]/div[1]/span'
    rating_text_xpath = '/html/body/section[2]/section[1]/div/div[1]/div[1]/div[3]/div[1]/div[1]/div[2]/span'
    industry_xpath = '/html/body/section[2]/section[1]/div/div[1]/div[1]/div[3]/div[2]/div[1]/div/p[2]/strong'
    industry_rank_xpath = '/html/body/section[2]/section[1]/div/div[1]/div[1]/div[3]/div[2]/div[1]/div/p[2]/span'
    company_site_xpath = '/html/body/section[2]/section[1]/div/div[1]/div[1]/div[1]/div/h2'

    rating_num = driver.find_element_by_xpath(rating_num_xpath).text    
    rating_text = driver.find_element_by_xpath(rating_text_xpath).text
    rating_text = re.sub('\n', ' ', rating_text)
    industry = driver.find_element_by_xpath(industry_xpath).text
    industry_rank = driver.find_element_by_xpath(industry_rank_xpath).text
    industry_rank = re.sub(" out of ", "/", industry_rank)
    company_site = driver.find_element_by_xpath(company_site_xpath).text

    company_original_name.append(companies[num])
    company_lst.append(company)
    company_site_lst.append(company_site)
    rating_num_lst.append(rating_num)
    rating_text_lst.append(rating_text)
    industry_lst.append(industry)
    industry_rank_lst.append(industry_rank)
error = []

driver = webdriver.Chrome(r'C:\Users\esthe\chromedriver.exe')
search_tool = 'https://www.sustainalytics.com/esg-ratings/?utm_term=&utm_campaign=Leads-Search-20&utm_source=adwords&utm_medium=ppc&hsa_acc=4619360780&hsa_cam=11145778763&hsa_grp=108965194933&hsa_ad=514798435870&hsa_src=g&hsa_tgt=dsa-437115340933&hsa_kw=&hsa_mt=b&hsa_net=adwords&hsa_ver=3&gclid=CjwKCAjwjJmIBhA4EiwAQdCbxte5HszKf3XYqp6OXKKctLCPuuJMvdgh3IcVVHgKlauc2WKjO1p5YRoCl6UQAvD_BwE'

num = 0

for comp in company_revised:
    try:
        sustainalytics(comp)
        num += 1
    except NoSuchElementException:
        print(comp)
        error.append(comp)
        continue
        nunm += 1
A. P. Moller Maersk
MSCI
Goldman Sachs FS Treasury Intms Instl
Lowe's Companies
Petroleo Brasileiro  Petrobras
Archer-Daniels Midland
Geberit
Advanced Drainage Systems
A.O. Smith
Bank of Montreal
Atlas Copco
Telefonaktiebolaget L M Ericsson
Amedisys
Ensign
Sabra Health Care REIT
Hong Kong Exchanges and Clearing
Novartis
Morrison (Wm) Supermarkets
Adevinta
Dj U.S. Semiconductors Index Swap Societe Generale
Power  of Canada
Westpac Banking
Australia and New Zealand Banking
LVMH Moet Hennessy Louis Vuitton
Keyence
Grafton   Shs
Smith (DS)
Rightmove
Enbridge Incorporation
Lloyds Banking  plc
Global Atlantic Finance
Amcor  Ordinary Shares
Packaging  of America
Stora Enso Oyj
Land O'Lakes
Australia and New Zealand Banking
Assured Guaranty Municipal Holdings
HSBC Capital
TransCanada Trust
Sumitomo Life Insurance
NatWest  plc
UBS  Funding (Switzerland)
Lloyds Banking
Koninklijke DSM
BlackRock Cash Funds Treasury SL Agency
Ingersoll-Rand Global Holdings  Limited
Banco Bilbao Vizcaya Argentaria,
Automatic Data Processing,
Xcel Energy
Fortis
NiSource
Keurig Dr Pepper
Marvell Technology
JM Smucker
General Mills
GrowGeneration
Darling Ingredients
num = 0
for i in error:
    print(num, i)
    num += 1
0 A. P. Moller Maersk
1 MSCI
2 Goldman Sachs FS Treasury Intms Instl
3 Lowe's Companies
4 Petroleo Brasileiro  Petrobras
5 Archer-Daniels Midland
6 Geberit
7 Advanced Drainage Systems
8 A.O. Smith
9 Bank of Montreal
10 Atlas Copco
11 Telefonaktiebolaget L M Ericsson
12 Amedisys
13 Ensign
14 Sabra Health Care REIT
15 Hong Kong Exchanges and Clearing
16 Novartis
17 Morrison (Wm) Supermarkets
18 Adevinta
19 Dj U.S. Semiconductors Index Swap Societe Generale
20 Power  of Canada
21 Westpac Banking
22 Australia and New Zealand Banking
23 LVMH Moet Hennessy Louis Vuitton
24 Keyence
25 Grafton   Shs
26 Smith (DS)
27 Rightmove
28 Enbridge Incorporation
29 Lloyds Banking  plc
30 Global Atlantic Finance
31 Amcor  Ordinary Shares
32 Packaging  of America
33 Stora Enso Oyj
34 Land O'Lakes
35 Australia and New Zealand Banking
36 Assured Guaranty Municipal Holdings
37 HSBC Capital
38 TransCanada Trust
39 Sumitomo Life Insurance
40 NatWest  plc
41 UBS  Funding (Switzerland)
42 Lloyds Banking
43 Koninklijke DSM
44 BlackRock Cash Funds Treasury SL Agency
45 Ingersoll-Rand Global Holdings  Limited
46 Banco Bilbao Vizcaya Argentaria,
47 Automatic Data Processing,
48 Xcel Energy
49 Fortis
50 NiSource
51 Keurig Dr Pepper
52 Marvell Technology
53 JM Smucker
54 General Mills
55 GrowGeneration
56 Darling Ingredients

4. Manually Re-name Companies

error[0] = 'A.P. Møller'
error[2] = 'Goldman Sachs'
error[3] = 'Lowe\'s'
error[4] = 'Petroleo Brasileiro'
error[5] = 'Archer'
error[8] = 'A. O. Smith'
error[11] = 'Telefonaktiebolaget'
error[15] = 'Hong Kong Exchanges'
error[17] = 'Morrison'
error[19] = 'Societe Generale'
error[20] = 'Power Corp. of Canada'
error[22] = 'Australia & New Zealand'
error[25] = 'Grafton'
error[26] = 'DS Smith'
error[28] = 'Enbridge'
error[29] = 'Lloyds Banking'
error[31] = 'Amcor'
error[32] = 'Packaging Corporation of America'
error[33] = 'Stora Enso'
error[35] = 'ASX:ANZ'
error[36] = 'Assured Guaranty'
error[37] = 'HSBC'
error[40] = 'NatWest'
error[41] = 'UBS Group'
error[44] = 'BlackRock'
error[45] = 'Ingersoll Rand',
error[46] = 'Banco Bilbao'
error[53] = 'The J. M. Smucker'

5. Another Loop for Remaining Companies

def sustainalytics(company):
    driver.get(search_tool)
    input_element = driver.find_element_by_css_selector('#searchInput')
    input_element.send_keys(company)
    time.sleep(3)
    click_element = driver.find_element_by_css_selector('#searchResults > div > div')
    click_element.click()

    rating_num_xpath = '/html/body/section[2]/section[1]/div/div[1]/div[1]/div[3]/div[1]/div[1]/div[1]/span'
    rating_text_xpath = '/html/body/section[2]/section[1]/div/div[1]/div[1]/div[3]/div[1]/div[1]/div[2]/span'
    industry_xpath = '/html/body/section[2]/section[1]/div/div[1]/div[1]/div[3]/div[2]/div[1]/div/p[2]/strong'
    industry_rank_xpath = '/html/body/section[2]/section[1]/div/div[1]/div[1]/div[3]/div[2]/div[1]/div/p[2]/span'
    company_site_xpath = '/html/body/section[2]/section[1]/div/div[1]/div[1]/div[1]/div/h2'

    rating_num = driver.find_element_by_xpath(rating_num_xpath).text    
    rating_text = driver.find_element_by_xpath(rating_text_xpath).text
    rating_text = re.sub('\n', ' ', rating_text)
    industry = driver.find_element_by_xpath(industry_xpath).text
    industry_rank = driver.find_element_by_xpath(industry_rank_xpath).text
    industry_rank = re.sub(" out of ", "/", industry_rank)
    company_site = driver.find_element_by_xpath(company_site_xpath).text

    company_lst.append(company)
    company_site_lst.append(company_site)
    rating_num_lst.append(rating_num)
    rating_text_lst.append(rating_text)
    industry_lst.append(industry)
    industry_rank_lst.append(industry_rank)
error2 = []

driver = webdriver.Chrome(r'C:\Users\esthe\chromedriver.exe')
search_tool = 'https://www.sustainalytics.com/esg-ratings/?utm_term=&utm_campaign=Leads-Search-20&utm_source=adwords&utm_medium=ppc&hsa_acc=4619360780&hsa_cam=11145778763&hsa_grp=108965194933&hsa_ad=514798435870&hsa_src=g&hsa_tgt=dsa-437115340933&hsa_kw=&hsa_mt=b&hsa_net=adwords&hsa_ver=3&gclid=CjwKCAjwjJmIBhA4EiwAQdCbxte5HszKf3XYqp6OXKKctLCPuuJMvdgh3IcVVHgKlauc2WKjO1p5YRoCl6UQAvD_BwE'

for comp in error:
    try:
        sustainalytics(comp)

    except NoSuchElementException:
        print(comp)
        error2.append(comp)
        continue
MSCI
Advanced Drainage Systems
Amedisys
Ensign
Sabra Health Care REIT
Global Atlantic Finance
Land O'Lakes
TransCanada Trust
Sumitomo Life Insurance
Automatic Data Processing,
GrowGeneration
Darling Ingredients
sustainalytics('Automatic Data Processing')
del error2[9]
data_final = {'company' : company_lst,
              'company_site' : company_site_lst,
              'rating_num' : rating_num_lst,
              'rating_text' : rating_text_lst, 
              'industry' : industry_lst, 
              'industry_rank' : industry_rank_lst}
df = pd.DataFrame(data_final)

df
  company company_site rating_num rating_text industry industry_rank
0 Novo Nordisk Novo Nordisk A/S 21.9 Medium Risk Pharmaceuticals 54/862
1 DSV Panalpina DSV Panalpina A/S 17.0 Low Risk Transportation 29/330
2 Vestas Wind Systems Vestas Wind Systems A/S 14.8 Low Risk Electrical Equipment 4/180
3 Orsted Ørsted A/S 20.5 Medium Risk Utilities 51/593
4 Genmab Genmab A/S 27.2 Medium Risk Pharmaceuticals 120/862
... ... ... ... ... ... ...
286 Keurig Dr Pepper Keurig Dr Pepper, Inc. 22.7 Medium Risk Food Products 43/550
287 Marvell Technology Marvell Technology Group Ltd. 20.1 Medium Risk Semiconductors 35/252
288 The J. M. Smucker The J. M. Smucker Co. 26.8 Medium Risk Food Products 87/550
289 General Mills General Mills Inc 21.4 Medium Risk Food Products 32/550
290 Automatic Data Processing Automatic Data Processing Inc 13.7 Low Risk Software & Services 21/805

291 rows × 6 columns

df.loc[151]
company                                                  Icon
company_site     Taiwan Semiconductor Manufacturing Co., Ltd.
rating_num                                               14.4
rating_text                                          Low Risk
industry                                       Semiconductors
industry_rank                                           4/252
Name: 151, dtype: object

6. Check for Searching Errors and Re-identify

df = df.drop(32) #Itron
df = df.drop(34) #The Toro
df = df.drop(44) #Investor
df = df.drop(58) #Terna
df = df.drop(67) #BP
df = df.drop(78) #Visa
df = df.drop(87) #AIA
df = df.drop(92) #MS&AD Insurance  Holdings
df = df.drop(104) #B&M European Value Retail
df = df.drop(127) #CSL
df = df.drop(151) #Icon
df = df.drop(160) #IMI
df = df.drop(176) #Mondi
df = df.drop(192) #AT&T
df = df.drop(198) #T&D Holdings
df = df.drop(206) #UBS
last_try = ['The Toro Co', 'Investor AB', 'BP p.l.c.', 'Visa, Inc.',
           'AIA Group', 'CSL Ltd.', 'IMI PLC', 'Mondi PLC', 'UBS Group']

error3 = []

driver = webdriver.Chrome(r'C:\Users\esthe\chromedriver.exe')
search_tool = 'https://www.sustainalytics.com/esg-ratings/?utm_term=&utm_campaign=Leads-Search-20&utm_source=adwords&utm_medium=ppc&hsa_acc=4619360780&hsa_cam=11145778763&hsa_grp=108965194933&hsa_ad=514798435870&hsa_src=g&hsa_tgt=dsa-437115340933&hsa_kw=&hsa_mt=b&hsa_net=adwords&hsa_ver=3&gclid=CjwKCAjwjJmIBhA4EiwAQdCbxte5HszKf3XYqp6OXKKctLCPuuJMvdgh3IcVVHgKlauc2WKjO1p5YRoCl6UQAvD_BwE'

for comp in last_try:
    try:
        sustainalytics(comp)

    except NoSuchElementException:
        print(comp)
        error3.append(comp)
        continue

7. Final Missing Compaies

final_missing_companies = error2 + ['Itron', 'Terna', 'MS&AD Insurance  Holdings',
                                    'B&M European Value Retail', 'Icon', 'AT&T', 'T&D Holdings']
final_missing_companies
['MSCI',
 'Advanced Drainage Systems',
 'Amedisys',
 'Ensign',
 'Sabra Health Care REIT',
 'Global Atlantic Finance',
 "Land O'Lakes",
 'TransCanada Trust',
 'Sumitomo Life Insurance',
 'GrowGeneration',
 'Darling Ingredients',
 'Itron',
 'Terna',
 'MS&AD Insurance  Holdings',
 'B&M European Value Retail',
 'Icon',
 'AT&T',
 'T&D Holdings']

8. Convert DataFrame to CSV

df.to_csv(r'C:\dataset\sustainalytics_esg_risk_rating.csv')
data_final2 = {'company' : company_lst,
              'company_site' : company_site_lst,
              'rating_num' : rating_num_lst,
              'rating_text' : rating_text_lst, 
              'industry' : industry_lst, 
              'industry_rank' : industry_rank_lst}

df2 = pd.DataFrame(data_final2)

df2 = df2.drop(32) #Itron
d2f = df2.drop(34) #The Toro
df2 = df2.drop(44) #Investor
df2 = df2.drop(58) #Terna
df2 = df2.drop(67) #BP
df2 = df2.drop(78) #Visa
df2 = df2.drop(87) #AIA
df2 = df2.drop(92) #MS&AD Insurance  Holdings
df2 = df2.drop(104) #B&M European Value Retail
df2 = df2.drop(127) #CSL
df2 = df2.drop(151) #Icon
df2 = df2.drop(160) #IMI
df2 = df2.drop(176) #Mondi
df2 = df2.drop(192) #AT&T
df2 = df2.drop(198) #T&D Holdings
df2 = df2.drop(206) #UBS

df2 = df2.reset_index()
df2
  index company company_site rating_num rating_text industry industry_rank
0 0 Novo Nordisk Novo Nordisk A/S 21.9 Medium Risk Pharmaceuticals 54/862
1 1 DSV Panalpina DSV Panalpina A/S 17.0 Low Risk Transportation 29/330
2 2 Vestas Wind Systems Vestas Wind Systems A/S 14.8 Low Risk Electrical Equipment 4/180
3 3 Orsted Ørsted A/S 20.5 Medium Risk Utilities 51/593
4 4 Genmab Genmab A/S 27.2 Medium Risk Pharmaceuticals 120/862
... ... ... ... ... ... ... ...
280 295 AIA Group AIA Group Ltd. 12.9 Low Risk Insurance 4/276
281 296 CSL Ltd. CSL Ltd. 25.2 Medium Risk Pharmaceuticals 89/862
282 297 IMI PLC IMI PLC 26.6 Medium Risk Machinery 69/463
283 298 Mondi PLC Mondi PLC 11.8 Low Risk Paper & Forestry 1/71
284 299 UBS Group UBS Group AG 22.9 Medium Risk Diversified Financials 161/712

285 rows × 7 columns

del df2['index']
df2
  company company_site rating_num rating_text industry industry_rank
0 Novo Nordisk Novo Nordisk A/S 21.9 Medium Risk Pharmaceuticals 54/862
1 DSV Panalpina DSV Panalpina A/S 17.0 Low Risk Transportation 29/330
2 Vestas Wind Systems Vestas Wind Systems A/S 14.8 Low Risk Electrical Equipment 4/180
3 Orsted Ørsted A/S 20.5 Medium Risk Utilities 51/593
4 Genmab Genmab A/S 27.2 Medium Risk Pharmaceuticals 120/862
... ... ... ... ... ... ...
280 AIA Group AIA Group Ltd. 12.9 Low Risk Insurance 4/276
281 CSL Ltd. CSL Ltd. 25.2 Medium Risk Pharmaceuticals 89/862
282 IMI PLC IMI PLC 26.6 Medium Risk Machinery 69/463
283 Mondi PLC Mondi PLC 11.8 Low Risk Paper & Forestry 1/71
284 UBS Group UBS Group AG 22.9 Medium Risk Diversified Financials 161/712

285 rows × 6 columns

df2.to_csv(r'C:\dataset\sustainalytics_esg_risk_rating.csv')