1. Imports
import pandas as pd
import numpy as np
import re
import requests
import bs4
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.common.exceptions import *
from selenium.webdriver.common.keys import Keys
import time
data = pd.read_excel(r'C:\dataset\crawling_url_total_.xlsx')
data
company | main_url | sub_url | |
---|---|---|---|
0 | Novo Nordisk A/S | https://www.novonordisk.com/ | https://www.novonordisk.com/sustainable-busine... |
1 | DSV Panalpina A/S | https://www.dsv.com/en/about-dsv/coronavirus | https://www.dsv.com/en/about-dsv/corporate-res... |
2 | Vestas Wind Systems A/S | https://www.vestas.com/en/about/csr#!inclusive... | https://www.vestas.com/en/investor/financial_r... |
3 | Orsted A/S | https://orsted.com//en/about-us | https://orsted.com/en/sustainability |
4 | Genmab A/S | https://www.genmab.com/who-we-are/ | https://ir.genmab.com/corporate-social-respons... |
... | ... | ... | ... |
297 | GrowGeneration Corp | https://ir.growgeneration.com/company-information | https://esg.censible.co/companies/GrowGenerati... |
298 | The Hershey Co | https://www.hersheyland.com/home/about.html | https://www.thehersheycompany.com/en_us/sustai... |
299 | PepsiCo Inc | https://www.pepsico.com//about/diversity-equit... | https://www.pepsico.com/sustainability/overview |
300 | Darling Ingredients Inc | https://www.darlingii.com//about-us | https://www.darlingii.com/csr#:~:text=Darling%... |
301 | Mondelez International | https://www.mondelezinternational.com//About-Us | https://www.mondelezinternational.com/Snacking... |
302 rows × 3 columns
companies = data['company']
companies
0 Novo Nordisk A/S
1 DSV Panalpina A/S
2 Vestas Wind Systems A/S
3 Orsted A/S
4 Genmab A/S
...
297 GrowGeneration Corp
298 The Hershey Co
299 PepsiCo Inc
300 Darling Ingredients Inc
301 Mondelez International
Name: company, Length: 302, dtype: object
2. Company Name Preprocessing
abbs = ['A/S', 'Corp', 'Inc', 'Ltd', 'PLC',
'ADR', 'SA', 'Co', 'AG', 'SE', 'SpA',
'S.A.', 'Inc.', 'NV', 'ASA', 'Group',
'Corporation', 'Company']
company_revised = []
for i in data['company']:
splited = i.split(' ')
for j in splited:
if j in abbs:
i = re.sub(j, '', i)
i = i.rstrip()
company_revised.append(i)
company_revised
['Novo Nordisk',
'DSV Panalpina',
'Vestas Wind Systems',
'Orsted',
'Genmab',
'Carlsberg',
'Coloplast',
'Novozymes',
'A. P. Moller Maersk',
'Pandora',
'MSCI',
'Goldman Sachs FS Treasury Intms Instl',
'Microsoft',
'The Home Depot',
'NVIDIA',
'Hess',
'BlackRock',
'Morgan Stanley',
'Honeywell International',
"Lowe's Companies",
'Vale',
'BHP',
'Rio Tinto',
'Petroleo Brasileiro Petrobras',
'Fortescue Metals',
'International Paper',
'Tyson Foods',
'Nutrien',
'Archer-Daniels Midland',
'Veolia Environnement',
'Danaher',
'Ecolab',
'Ferguson',
'Geberit',
'Pentair',
'Roper Technologies',
'Waters',
'American Water Works',
'IDEX',
'Itron',
'Advanced Drainage Systems',
'Xylem',
'A.O. Smith',
'The Toro',
'Royal Bank of Canada',
'Shopify',
'The Toronto-Dominion Bank',
'Enbridge',
'Bank of Nova Scotia',
'Canadian National Railway',
'Brookfield Asset Management',
'Bank of Montreal',
'Canadian Pacific Railway',
'TC Energy',
'Investor',
'Nordea Bank Abp',
'Atlas Copco',
'Telefonaktiebolaget L M Ericsson',
'Volvo',
'KONE Oyj',
'Nokia Oyj',
'Aptiv',
'Johnson Controls International',
'Eaton',
'ABB',
'Schneider Electric',
'Enphase Energy',
'Samsung SDI',
'SolarEdge Technologies',
'Prysmian',
'Terna',
'Canadian Imperial Bank of Commerce',
'Essential Utilities',
'AstraZeneca',
'Unilever',
'HSBC Holdings',
'Diageo',
'GlaxoSmithKline',
'Rio Tinto',
'BP',
'British American Tobacco',
'Royal Dutch Shell',
'Welltower',
'Ventas',
'Healthpeak Properties',
'Omega Healthcare Investors',
'Orpea',
'Amedisys',
'LHC',
'Ensign',
'Aedifica',
'Sabra Health Care REIT',
'Alphabet',
'Visa',
'The Walt Disney',
'Salesforce.com',
'Tesla',
'Intel',
'Procter & Gamble',
'ASML Holding',
'Roche Holding',
'Nestle',
'AIA',
'Allianz',
'Wesfarmers',
'Hong Kong Exchanges and Clearing',
'ITOCHU Techno-Solutions',
'Admiral',
'MS&AD Insurance Holdings',
'Henkel',
'RELX',
'Croda International',
'Novartis',
'Scout24',
'Royal Mail',
'Investec',
'BT',
'Morrison (Wm) Supermarkets',
'Glencore',
'EVRAZ',
'Ashtead',
'Entain',
'B&M European Value Retail',
'Spectris',
'Equinor',
'DNB',
'Telenor',
'Mowi',
'Yara International',
'Norsk Hydro',
'Orkla',
'Tomra Systems',
'Adevinta',
'Gjensidige Forsikring',
'Dj U.S. Semiconductors Index Swap Societe Generale',
'Broadcom',
'Texas Instruments',
'Qualcomm',
'Wipro',
'Akzo Nobel',
'Magna International',
'Vivendi',
'Tokyo Electron',
'Nippon Telegraph & Telephone',
'Mitsubishi',
'Power of Canada',
'Commonwealth Bank of Australia',
'BHP',
'CSL',
'Westpac Banking',
'National Australia Bank',
'Australia and New Zealand Banking',
'Macquarie',
'Woolworths',
'Rio Tinto',
'LVMH Moet Hennessy Louis Vuitton',
'Toyota Motor',
'SAP',
'Berkshire Hathaway',
'JPMorgan Chase &',
'Bank of America',
'Wells Fargo &',
'Citigroup',
'Goldman Sachs',
'Sony',
"L'Oreal",
'Keyence',
'Siemens',
'Daikin Industries',
'AXA',
'CRH',
'Flutter Entertainment',
'Kerry',
'Kingspan',
'Smurfit Kappa',
'Ryanair Holdings',
'Grafton Shs',
'Icon',
'Glanbia',
'Bank of Ireland',
'Intermediate Capital',
'Smith (DS)',
'Rightmove',
'Howden Joinery',
'Electrocomponents',
'Weir',
'Pennon',
'Dechra Pharmaceuticals',
'IMI',
'Barclays',
'Emera',
'AerCap Holdings N.V.',
'Wells Fargo &',
'JPMorgan Chase & Co.',
'Societe Generale',
'Credit Suisse',
'Enbridge Incorporation',
'Lloyds Banking plc',
'Global Atlantic Finance',
'Assa Abloy',
'Hexagon',
'Sandvik',
'Evolution AB',
'Skandinaviska Enskilda Banken',
'UPM-Kymmene Oyj',
'Avery Dennison',
'Amcor Ordinary Shares',
'Weyerhaeuser',
'Mondi',
'Packaging of America',
'WestRock',
'Stora Enso Oyj',
"Land O'Lakes",
'Australia and New Zealand Banking',
'Assured Guaranty Municipal Holdings',
'Markel',
'Dominion Energy',
'The Bank of New York Mellon',
'Wells Fargo &',
'Duke Energy',
'Citigroup',
'HSBC Capital',
'TransCanada Trust',
'Bank of America',
'Sumitomo Life Insurance',
'MetLife',
'NatWest plc',
'Nordea Bank ABP',
'Credit Agricole',
'Societe Generale',
'HSBC Holdings plc',
'ING Groep N.V.',
'UBS Funding (Switzerland)',
'Standard Chartered plc',
'AT&T',
'Computershare',
'Brother Industries',
'Daiwa House Industry',
'Koninklijke Ahold Delhaize',
'Lawson',
'T&D Holdings',
'ITOCHU',
'Toyota Tsusho',
'AGL Energy',
'Telefonica',
'BNP Paribas',
'Banco Santander',
'Zurich Insurance',
'UBS',
'ING Groep',
'Prudential',
'Lloyds Banking',
'Prosus',
'Koninklijke Philips',
'Adyen',
'Koninklijke DSM',
'Wolters Kluwer',
'Heineken',
'BlackRock Cash Funds Treasury SL Agency',
'Bank of Nova Scotia',
'Avangrid',
'Target',
'Ingersoll-Rand Global Holdings Limited',
'Banco Bilbao Vizcaya Argentaria,',
'Target',
'Kellogg',
'Automatic Data Processing,',
'Sands China',
'NextEra Energy',
'Exelon',
'Public Service Enterprise',
'Sempra Energy',
'Alliant Energy',
'CMS Energy',
'Xcel Energy',
'Fortis',
'NiSource',
'Northrop Grumman',
'General Dynamics',
'Keurig Dr Pepper',
'Altria',
'Hormel Foods',
'Nucor',
'Vulcan Materials',
'Conagra Brands',
'Clorox',
'Taiwan Semiconductor Manufacturing',
'ASML Holding',
'Advanced Micro Devices',
'Micron Technology',
'Applied Materials',
'Marvell Technology',
'Analog Devices',
'KLA',
'JM Smucker',
'General Mills',
'Bunge',
'GrowGeneration',
'The Hershey',
'PepsiCo',
'Darling Ingredients',
'Mondelez International']
3. Define Function (Using Selenium Chrome Driver)
rating_num_lst = []
rating_text_lst = []
industry_lst = []
industry_rank_lst = []
company_original_name = []
company_lst = []
company_site_lst = []
def sustainalytics(company):
driver.get(search_tool)
input_element = driver.find_element_by_css_selector('#searchInput')
input_element.send_keys(company)
time.sleep(2)
click_element = driver.find_element_by_css_selector('#searchResults > div > div')
click_element.click()
rating_num_xpath = '/html/body/section[2]/section[1]/div/div[1]/div[1]/div[3]/div[1]/div[1]/div[1]/span'
rating_text_xpath = '/html/body/section[2]/section[1]/div/div[1]/div[1]/div[3]/div[1]/div[1]/div[2]/span'
industry_xpath = '/html/body/section[2]/section[1]/div/div[1]/div[1]/div[3]/div[2]/div[1]/div/p[2]/strong'
industry_rank_xpath = '/html/body/section[2]/section[1]/div/div[1]/div[1]/div[3]/div[2]/div[1]/div/p[2]/span'
company_site_xpath = '/html/body/section[2]/section[1]/div/div[1]/div[1]/div[1]/div/h2'
rating_num = driver.find_element_by_xpath(rating_num_xpath).text
rating_text = driver.find_element_by_xpath(rating_text_xpath).text
rating_text = re.sub('\n', ' ', rating_text)
industry = driver.find_element_by_xpath(industry_xpath).text
industry_rank = driver.find_element_by_xpath(industry_rank_xpath).text
industry_rank = re.sub(" out of ", "/", industry_rank)
company_site = driver.find_element_by_xpath(company_site_xpath).text
company_original_name.append(companies[num])
company_lst.append(company)
company_site_lst.append(company_site)
rating_num_lst.append(rating_num)
rating_text_lst.append(rating_text)
industry_lst.append(industry)
industry_rank_lst.append(industry_rank)
error = []
driver = webdriver.Chrome(r'C:\Users\esthe\chromedriver.exe')
search_tool = 'https://www.sustainalytics.com/esg-ratings/?utm_term=&utm_campaign=Leads-Search-20&utm_source=adwords&utm_medium=ppc&hsa_acc=4619360780&hsa_cam=11145778763&hsa_grp=108965194933&hsa_ad=514798435870&hsa_src=g&hsa_tgt=dsa-437115340933&hsa_kw=&hsa_mt=b&hsa_net=adwords&hsa_ver=3&gclid=CjwKCAjwjJmIBhA4EiwAQdCbxte5HszKf3XYqp6OXKKctLCPuuJMvdgh3IcVVHgKlauc2WKjO1p5YRoCl6UQAvD_BwE'
num = 0
for comp in company_revised:
try:
sustainalytics(comp)
num += 1
except NoSuchElementException:
print(comp)
error.append(comp)
continue
nunm += 1
A. P. Moller Maersk
MSCI
Goldman Sachs FS Treasury Intms Instl
Lowe's Companies
Petroleo Brasileiro Petrobras
Archer-Daniels Midland
Geberit
Advanced Drainage Systems
A.O. Smith
Bank of Montreal
Atlas Copco
Telefonaktiebolaget L M Ericsson
Amedisys
Ensign
Sabra Health Care REIT
Hong Kong Exchanges and Clearing
Novartis
Morrison (Wm) Supermarkets
Adevinta
Dj U.S. Semiconductors Index Swap Societe Generale
Power of Canada
Westpac Banking
Australia and New Zealand Banking
LVMH Moet Hennessy Louis Vuitton
Keyence
Grafton Shs
Smith (DS)
Rightmove
Enbridge Incorporation
Lloyds Banking plc
Global Atlantic Finance
Amcor Ordinary Shares
Packaging of America
Stora Enso Oyj
Land O'Lakes
Australia and New Zealand Banking
Assured Guaranty Municipal Holdings
HSBC Capital
TransCanada Trust
Sumitomo Life Insurance
NatWest plc
UBS Funding (Switzerland)
Lloyds Banking
Koninklijke DSM
BlackRock Cash Funds Treasury SL Agency
Ingersoll-Rand Global Holdings Limited
Banco Bilbao Vizcaya Argentaria,
Automatic Data Processing,
Xcel Energy
Fortis
NiSource
Keurig Dr Pepper
Marvell Technology
JM Smucker
General Mills
GrowGeneration
Darling Ingredients
num = 0
for i in error:
print(num, i)
num += 1
0 A. P. Moller Maersk
1 MSCI
2 Goldman Sachs FS Treasury Intms Instl
3 Lowe's Companies
4 Petroleo Brasileiro Petrobras
5 Archer-Daniels Midland
6 Geberit
7 Advanced Drainage Systems
8 A.O. Smith
9 Bank of Montreal
10 Atlas Copco
11 Telefonaktiebolaget L M Ericsson
12 Amedisys
13 Ensign
14 Sabra Health Care REIT
15 Hong Kong Exchanges and Clearing
16 Novartis
17 Morrison (Wm) Supermarkets
18 Adevinta
19 Dj U.S. Semiconductors Index Swap Societe Generale
20 Power of Canada
21 Westpac Banking
22 Australia and New Zealand Banking
23 LVMH Moet Hennessy Louis Vuitton
24 Keyence
25 Grafton Shs
26 Smith (DS)
27 Rightmove
28 Enbridge Incorporation
29 Lloyds Banking plc
30 Global Atlantic Finance
31 Amcor Ordinary Shares
32 Packaging of America
33 Stora Enso Oyj
34 Land O'Lakes
35 Australia and New Zealand Banking
36 Assured Guaranty Municipal Holdings
37 HSBC Capital
38 TransCanada Trust
39 Sumitomo Life Insurance
40 NatWest plc
41 UBS Funding (Switzerland)
42 Lloyds Banking
43 Koninklijke DSM
44 BlackRock Cash Funds Treasury SL Agency
45 Ingersoll-Rand Global Holdings Limited
46 Banco Bilbao Vizcaya Argentaria,
47 Automatic Data Processing,
48 Xcel Energy
49 Fortis
50 NiSource
51 Keurig Dr Pepper
52 Marvell Technology
53 JM Smucker
54 General Mills
55 GrowGeneration
56 Darling Ingredients
4. Manually Re-name Companies
error[0] = 'A.P. Møller'
error[2] = 'Goldman Sachs'
error[3] = 'Lowe\'s'
error[4] = 'Petroleo Brasileiro'
error[5] = 'Archer'
error[8] = 'A. O. Smith'
error[11] = 'Telefonaktiebolaget'
error[15] = 'Hong Kong Exchanges'
error[17] = 'Morrison'
error[19] = 'Societe Generale'
error[20] = 'Power Corp. of Canada'
error[22] = 'Australia & New Zealand'
error[25] = 'Grafton'
error[26] = 'DS Smith'
error[28] = 'Enbridge'
error[29] = 'Lloyds Banking'
error[31] = 'Amcor'
error[32] = 'Packaging Corporation of America'
error[33] = 'Stora Enso'
error[35] = 'ASX:ANZ'
error[36] = 'Assured Guaranty'
error[37] = 'HSBC'
error[40] = 'NatWest'
error[41] = 'UBS Group'
error[44] = 'BlackRock'
error[45] = 'Ingersoll Rand',
error[46] = 'Banco Bilbao'
error[53] = 'The J. M. Smucker'
5. Another Loop for Remaining Companies
def sustainalytics(company):
driver.get(search_tool)
input_element = driver.find_element_by_css_selector('#searchInput')
input_element.send_keys(company)
time.sleep(3)
click_element = driver.find_element_by_css_selector('#searchResults > div > div')
click_element.click()
rating_num_xpath = '/html/body/section[2]/section[1]/div/div[1]/div[1]/div[3]/div[1]/div[1]/div[1]/span'
rating_text_xpath = '/html/body/section[2]/section[1]/div/div[1]/div[1]/div[3]/div[1]/div[1]/div[2]/span'
industry_xpath = '/html/body/section[2]/section[1]/div/div[1]/div[1]/div[3]/div[2]/div[1]/div/p[2]/strong'
industry_rank_xpath = '/html/body/section[2]/section[1]/div/div[1]/div[1]/div[3]/div[2]/div[1]/div/p[2]/span'
company_site_xpath = '/html/body/section[2]/section[1]/div/div[1]/div[1]/div[1]/div/h2'
rating_num = driver.find_element_by_xpath(rating_num_xpath).text
rating_text = driver.find_element_by_xpath(rating_text_xpath).text
rating_text = re.sub('\n', ' ', rating_text)
industry = driver.find_element_by_xpath(industry_xpath).text
industry_rank = driver.find_element_by_xpath(industry_rank_xpath).text
industry_rank = re.sub(" out of ", "/", industry_rank)
company_site = driver.find_element_by_xpath(company_site_xpath).text
company_lst.append(company)
company_site_lst.append(company_site)
rating_num_lst.append(rating_num)
rating_text_lst.append(rating_text)
industry_lst.append(industry)
industry_rank_lst.append(industry_rank)
error2 = []
driver = webdriver.Chrome(r'C:\Users\esthe\chromedriver.exe')
search_tool = 'https://www.sustainalytics.com/esg-ratings/?utm_term=&utm_campaign=Leads-Search-20&utm_source=adwords&utm_medium=ppc&hsa_acc=4619360780&hsa_cam=11145778763&hsa_grp=108965194933&hsa_ad=514798435870&hsa_src=g&hsa_tgt=dsa-437115340933&hsa_kw=&hsa_mt=b&hsa_net=adwords&hsa_ver=3&gclid=CjwKCAjwjJmIBhA4EiwAQdCbxte5HszKf3XYqp6OXKKctLCPuuJMvdgh3IcVVHgKlauc2WKjO1p5YRoCl6UQAvD_BwE'
for comp in error:
try:
sustainalytics(comp)
except NoSuchElementException:
print(comp)
error2.append(comp)
continue
MSCI
Advanced Drainage Systems
Amedisys
Ensign
Sabra Health Care REIT
Global Atlantic Finance
Land O'Lakes
TransCanada Trust
Sumitomo Life Insurance
Automatic Data Processing,
GrowGeneration
Darling Ingredients
sustainalytics('Automatic Data Processing')
del error2[9]
data_final = {'company' : company_lst,
'company_site' : company_site_lst,
'rating_num' : rating_num_lst,
'rating_text' : rating_text_lst,
'industry' : industry_lst,
'industry_rank' : industry_rank_lst}
df = pd.DataFrame(data_final)
df
company | company_site | rating_num | rating_text | industry | industry_rank | |
---|---|---|---|---|---|---|
0 | Novo Nordisk | Novo Nordisk A/S | 21.9 | Medium Risk | Pharmaceuticals | 54/862 |
1 | DSV Panalpina | DSV Panalpina A/S | 17.0 | Low Risk | Transportation | 29/330 |
2 | Vestas Wind Systems | Vestas Wind Systems A/S | 14.8 | Low Risk | Electrical Equipment | 4/180 |
3 | Orsted | Ørsted A/S | 20.5 | Medium Risk | Utilities | 51/593 |
4 | Genmab | Genmab A/S | 27.2 | Medium Risk | Pharmaceuticals | 120/862 |
... | ... | ... | ... | ... | ... | ... |
286 | Keurig Dr Pepper | Keurig Dr Pepper, Inc. | 22.7 | Medium Risk | Food Products | 43/550 |
287 | Marvell Technology | Marvell Technology Group Ltd. | 20.1 | Medium Risk | Semiconductors | 35/252 |
288 | The J. M. Smucker | The J. M. Smucker Co. | 26.8 | Medium Risk | Food Products | 87/550 |
289 | General Mills | General Mills Inc | 21.4 | Medium Risk | Food Products | 32/550 |
290 | Automatic Data Processing | Automatic Data Processing Inc | 13.7 | Low Risk | Software & Services | 21/805 |
291 rows × 6 columns
df.loc[151]
company Icon
company_site Taiwan Semiconductor Manufacturing Co., Ltd.
rating_num 14.4
rating_text Low Risk
industry Semiconductors
industry_rank 4/252
Name: 151, dtype: object
6. Check for Searching Errors and Re-identify
df = df.drop(32) #Itron
df = df.drop(34) #The Toro
df = df.drop(44) #Investor
df = df.drop(58) #Terna
df = df.drop(67) #BP
df = df.drop(78) #Visa
df = df.drop(87) #AIA
df = df.drop(92) #MS&AD Insurance Holdings
df = df.drop(104) #B&M European Value Retail
df = df.drop(127) #CSL
df = df.drop(151) #Icon
df = df.drop(160) #IMI
df = df.drop(176) #Mondi
df = df.drop(192) #AT&T
df = df.drop(198) #T&D Holdings
df = df.drop(206) #UBS
last_try = ['The Toro Co', 'Investor AB', 'BP p.l.c.', 'Visa, Inc.',
'AIA Group', 'CSL Ltd.', 'IMI PLC', 'Mondi PLC', 'UBS Group']
error3 = []
driver = webdriver.Chrome(r'C:\Users\esthe\chromedriver.exe')
search_tool = 'https://www.sustainalytics.com/esg-ratings/?utm_term=&utm_campaign=Leads-Search-20&utm_source=adwords&utm_medium=ppc&hsa_acc=4619360780&hsa_cam=11145778763&hsa_grp=108965194933&hsa_ad=514798435870&hsa_src=g&hsa_tgt=dsa-437115340933&hsa_kw=&hsa_mt=b&hsa_net=adwords&hsa_ver=3&gclid=CjwKCAjwjJmIBhA4EiwAQdCbxte5HszKf3XYqp6OXKKctLCPuuJMvdgh3IcVVHgKlauc2WKjO1p5YRoCl6UQAvD_BwE'
for comp in last_try:
try:
sustainalytics(comp)
except NoSuchElementException:
print(comp)
error3.append(comp)
continue
7. Final Missing Compaies
final_missing_companies = error2 + ['Itron', 'Terna', 'MS&AD Insurance Holdings',
'B&M European Value Retail', 'Icon', 'AT&T', 'T&D Holdings']
final_missing_companies
['MSCI',
'Advanced Drainage Systems',
'Amedisys',
'Ensign',
'Sabra Health Care REIT',
'Global Atlantic Finance',
"Land O'Lakes",
'TransCanada Trust',
'Sumitomo Life Insurance',
'GrowGeneration',
'Darling Ingredients',
'Itron',
'Terna',
'MS&AD Insurance Holdings',
'B&M European Value Retail',
'Icon',
'AT&T',
'T&D Holdings']
8. Convert DataFrame to CSV
df.to_csv(r'C:\dataset\sustainalytics_esg_risk_rating.csv')
data_final2 = {'company' : company_lst,
'company_site' : company_site_lst,
'rating_num' : rating_num_lst,
'rating_text' : rating_text_lst,
'industry' : industry_lst,
'industry_rank' : industry_rank_lst}
df2 = pd.DataFrame(data_final2)
df2 = df2.drop(32) #Itron
d2f = df2.drop(34) #The Toro
df2 = df2.drop(44) #Investor
df2 = df2.drop(58) #Terna
df2 = df2.drop(67) #BP
df2 = df2.drop(78) #Visa
df2 = df2.drop(87) #AIA
df2 = df2.drop(92) #MS&AD Insurance Holdings
df2 = df2.drop(104) #B&M European Value Retail
df2 = df2.drop(127) #CSL
df2 = df2.drop(151) #Icon
df2 = df2.drop(160) #IMI
df2 = df2.drop(176) #Mondi
df2 = df2.drop(192) #AT&T
df2 = df2.drop(198) #T&D Holdings
df2 = df2.drop(206) #UBS
df2 = df2.reset_index()
df2
index | company | company_site | rating_num | rating_text | industry | industry_rank | |
---|---|---|---|---|---|---|---|
0 | 0 | Novo Nordisk | Novo Nordisk A/S | 21.9 | Medium Risk | Pharmaceuticals | 54/862 |
1 | 1 | DSV Panalpina | DSV Panalpina A/S | 17.0 | Low Risk | Transportation | 29/330 |
2 | 2 | Vestas Wind Systems | Vestas Wind Systems A/S | 14.8 | Low Risk | Electrical Equipment | 4/180 |
3 | 3 | Orsted | Ørsted A/S | 20.5 | Medium Risk | Utilities | 51/593 |
4 | 4 | Genmab | Genmab A/S | 27.2 | Medium Risk | Pharmaceuticals | 120/862 |
... | ... | ... | ... | ... | ... | ... | ... |
280 | 295 | AIA Group | AIA Group Ltd. | 12.9 | Low Risk | Insurance | 4/276 |
281 | 296 | CSL Ltd. | CSL Ltd. | 25.2 | Medium Risk | Pharmaceuticals | 89/862 |
282 | 297 | IMI PLC | IMI PLC | 26.6 | Medium Risk | Machinery | 69/463 |
283 | 298 | Mondi PLC | Mondi PLC | 11.8 | Low Risk | Paper & Forestry | 1/71 |
284 | 299 | UBS Group | UBS Group AG | 22.9 | Medium Risk | Diversified Financials | 161/712 |
285 rows × 7 columns
del df2['index']
df2
company | company_site | rating_num | rating_text | industry | industry_rank | |
---|---|---|---|---|---|---|
0 | Novo Nordisk | Novo Nordisk A/S | 21.9 | Medium Risk | Pharmaceuticals | 54/862 |
1 | DSV Panalpina | DSV Panalpina A/S | 17.0 | Low Risk | Transportation | 29/330 |
2 | Vestas Wind Systems | Vestas Wind Systems A/S | 14.8 | Low Risk | Electrical Equipment | 4/180 |
3 | Orsted | Ørsted A/S | 20.5 | Medium Risk | Utilities | 51/593 |
4 | Genmab | Genmab A/S | 27.2 | Medium Risk | Pharmaceuticals | 120/862 |
... | ... | ... | ... | ... | ... | ... |
280 | AIA Group | AIA Group Ltd. | 12.9 | Low Risk | Insurance | 4/276 |
281 | CSL Ltd. | CSL Ltd. | 25.2 | Medium Risk | Pharmaceuticals | 89/862 |
282 | IMI PLC | IMI PLC | 26.6 | Medium Risk | Machinery | 69/463 |
283 | Mondi PLC | Mondi PLC | 11.8 | Low Risk | Paper & Forestry | 1/71 |
284 | UBS Group | UBS Group AG | 22.9 | Medium Risk | Diversified Financials | 161/712 |
285 rows × 6 columns
df2.to_csv(r'C:\dataset\sustainalytics_esg_risk_rating.csv')
'Python Programming > Projects' 카테고리의 다른 글
WebPage Content Crawling & Preprocessing (0) | 2021.08.03 |
---|---|
기업의 ESG 요소와 수익률 간의 상관관계 분석(2) - Clustering Analysis (1) | 2021.06.02 |
기업의 ESG 요소와 수익률 간의 상관관계 분석(1) - Data Prep & Exploration (0) | 2021.06.02 |
국가 및 연도 별 총생산량(GDP)과 1인당 GDP에 대한 비교분석(2) - 시각화와 결과 (1) | 2021.02.18 |
COVID19 - Consumption Pattern Change in Itaewon (0) | 2021.02.14 |