Even if you are not currently on a recruiting term, you can leverage all your previously applied jobs to aggregate more data. After installing Selenium webdriver, you can run the following script to scrape the company profiles from all the companies you historically applied to. If you wish, share the results on this Google Form.
import sys | |
import time | |
from selenium import webdriver | |
from selenium.webdriver.common.keys import Keys | |
def is_percentage(input): | |
try: | |
if len(input.split(' ')) < 2: | |
return False | |
vals = input.split(' ')[1][:-1] | |
num = int(vals) | |
except: | |
return False | |
return True | |
user_name = "YOUR EMAILID" | |
password = "YOUR PASSWORD" | |
driver = webdriver.Chrome() | |
driver.get("http://waterlooworks.uwaterloo.ca/") | |
element = driver.find_element_by_link_text("Students/Alumni/Staff") | |
element.click() | |
element = driver.find_element_by_id("userNameInput") | |
element.send_keys(sys.argv[1]) | |
element = driver.find_element_by_id("nextButton") | |
element.click() | |
element = driver.find_element_by_id("passwordInput") | |
element.send_keys(sys.argv[2]) | |
element = driver.find_element_by_id("submitButton") | |
element.click() | |
driver.execute_script("orbisApp.buildForm({'action':'_-_-B5i1dpSQA_oT3Cnw2tGjP1m_oMHwA3NuBZyIQMsDQeGeXmiab3z6-RMk04HAqmiirr2Tn0cudOqKvRq_SfN7sHQDLN0rTyRdhIMZ0XdMc29pBVdkaefNJcIfZ_7enhFBMAkT4DLr98tz6GdCYVib8ZLxzPoU61eqMOgSIUfvYweEhHQCelgjQIo1lIDqHQ'}, '', '').submit();") | |
element = driver.find_element_by_id("numOfDays") | |
element.click() | |
for option in element.find_elements_by_tag_name('option'): | |
if option.text == 'all': | |
option.click() # select() in earlier versions of webdriver | |
break | |
driver.execute_script("orbisApp.buildForm({'action':'_-_-40MeaLNVvvWSDCAHVUuEGazAeWd47BpzSiSKKkxNqGmt4Oy5otxB9lwg7feIdfaLX0L0P5b4hUqcMw1eZILiVx0cj1J9iR6NCO8nEHVlJqa65xXxYJVjCu0_8W5JOL6xpF8Cj1f9LYvBnIXz5g1o2ILA58me9-o2yMsDC7GnMhdydZtiAm8VufvOTtzDgzk','numOfDays':'0','selectedTerm':''}, '/myAccount/co-op/coopApplications.htm', '').submit();") | |
time.sleep(15) | |
page_number = '2' | |
page_btn = driver.find_element_by_link_text(page_number) | |
page_btn.click() | |
time.sleep(10) | |
tbody = driver.find_elements_by_tag_name('tbody')[1] | |
rows = tbody.find_elements_by_tag_name("tr") | |
# Get all of the rows in the table | |
col_ids = [] | |
for row in rows: | |
col = row.find_elements_by_tag_name("td")[2] | |
print('Debug: Retrieved posting id {}'.format(col.text)) | |
col_ids.append(col.text) | |
limit = 100 | |
count = 0 | |
for val in col_ids: | |
count += 1 | |
if count == limit: | |
break | |
time.sleep(12) | |
print('Debug: Processing posting {}'.format(val)) | |
# Prints text from the element | |
exec_stmt = "orbisApp.buildForm({action : '_-_-Qo0xCOoDKiFvKx8-s4i7-_dyFImCcpV21HiZaHiuKfi4sGCADCiU36YqTmctuoBpZAqJgCzKLkE87GxJEM6WRbID26oupnSE8n9QGIeIlmgYSv20iGGiySuea9ZiWz3LfJG9soWtM5h3N-bq4ic8LJWtUZdveZrZI87JAB38gQ', postingId: " + val + "}, '/myAccount/co-op/coop-postings.htm', '').submit();" | |
driver.execute_script(exec_stmt) | |
time.sleep(7) | |
exec_stmt = "orbisApp.buildForm({'action':'_-_-ugQvDI001i4Fg7nOJJ1qF9fbCiy7d84POt8Wxa29O5ZZPF8L4gd5lpYISaZRal8jNbibME0gSMfcjwVsJTv5RPJBrZ3mCMV3iDqufArnctF-gZRXzqt6vWZ-FI4tAiHBsREthuYVM8ekffVSFOCzoRDNc6wL8VgBWXjEt1w7MUe3H702479gGsOdk4pZ','postingId':'" + val + "','accessToPostings':'','npfGroup':''}, '/myAccount/co-op/coop-postings.htm', '').submit();" | |
driver.execute_script(exec_stmt) | |
time.sleep(7) | |
company_name = driver.find_elements_by_class_name('span12')[2] | |
print('-----') | |
print ('{}'.format(' '.join(company_name.text.split(' ')[1:]))) | |
print('--') | |
table = driver.find_elements_by_css_selector('.table.table-bordered.table-striped') | |
table_header = table[1].find_element_by_tag_name('thead') | |
table_header_entries = table_header.find_elements_by_tag_name('th')[2:] | |
print (len(table_header_entries)) | |
month_entries = [] | |
for e in table_header_entries: | |
month_entries.append(e.text) | |
print('{}'.format(e.text)) | |
table_body = driver.find_elements_by_tag_name('tbody')[2].find_element_by_tag_name('tr') | |
table_body_entries = table_body.find_elements_by_tag_name('td')[1:] | |
count_entries = [] | |
for e in table_body_entries: | |
count_entries.append(e.text) | |
print('{}'.format(e.text)) | |
print('--') | |
# Get co-op records | |
texts = driver.find_elements_by_tag_name('text') | |
for text in texts: | |
tspans = text.find_elements_by_tag_name('tspan') | |
if len(tspans) == 2: | |
if (is_percentage(tspans[1].text)): | |
print (tspans[0].text) | |
print (tspans[1].text.split(' ')[1]) | |
print('*****') | |
driver.execute_script("window.history.go(-1)") | |
driver.execute_script("window.history.go(-1)") |