Interested in contributing data?

Even if you are not currently on a recruiting term, you can leverage all your previously applied jobs to aggregate more data. After installing Selenium webdriver, you can run the following script to scrape the company profiles from all the companies you historically applied to. If you wish, share the results on this Google Form.

import sys
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
def is_percentage(input):
try:
if len(input.split(' ')) < 2:
return False
vals = input.split(' ')[1][:-1]
num = int(vals)
except:
return False
return True
user_name = "YOUR EMAILID"
password = "YOUR PASSWORD"
driver = webdriver.Chrome()
driver.get("http://waterlooworks.uwaterloo.ca/")
element = driver.find_element_by_link_text("Students/Alumni/Staff")
element.click()
element = driver.find_element_by_id("userNameInput")
element.send_keys(sys.argv[1])
element = driver.find_element_by_id("nextButton")
element.click()
element = driver.find_element_by_id("passwordInput")
element.send_keys(sys.argv[2])
element = driver.find_element_by_id("submitButton")
element.click()
driver.execute_script("orbisApp.buildForm({'action':'_-_-B5i1dpSQA_oT3Cnw2tGjP1m_oMHwA3NuBZyIQMsDQeGeXmiab3z6-RMk04HAqmiirr2Tn0cudOqKvRq_SfN7sHQDLN0rTyRdhIMZ0XdMc29pBVdkaefNJcIfZ_7enhFBMAkT4DLr98tz6GdCYVib8ZLxzPoU61eqMOgSIUfvYweEhHQCelgjQIo1lIDqHQ'}, '', '').submit();")
element = driver.find_element_by_id("numOfDays")
element.click()
for option in element.find_elements_by_tag_name('option'):
if option.text == 'all':
option.click() # select() in earlier versions of webdriver
break
driver.execute_script("orbisApp.buildForm({'action':'_-_-40MeaLNVvvWSDCAHVUuEGazAeWd47BpzSiSKKkxNqGmt4Oy5otxB9lwg7feIdfaLX0L0P5b4hUqcMw1eZILiVx0cj1J9iR6NCO8nEHVlJqa65xXxYJVjCu0_8W5JOL6xpF8Cj1f9LYvBnIXz5g1o2ILA58me9-o2yMsDC7GnMhdydZtiAm8VufvOTtzDgzk','numOfDays':'0','selectedTerm':''}, '/myAccount/co-op/coopApplications.htm', '').submit();")
time.sleep(15)
page_number = '2'
page_btn = driver.find_element_by_link_text(page_number)
page_btn.click()
time.sleep(10)
tbody = driver.find_elements_by_tag_name('tbody')[1]
rows = tbody.find_elements_by_tag_name("tr")
# Get all of the rows in the table
col_ids = []
for row in rows:
col = row.find_elements_by_tag_name("td")[2]
print('Debug: Retrieved posting id {}'.format(col.text))
col_ids.append(col.text)
limit = 100
count = 0
for val in col_ids:
count += 1
if count == limit:
break
time.sleep(12)
print('Debug: Processing posting {}'.format(val))
# Prints text from the element
exec_stmt = "orbisApp.buildForm({action : '_-_-Qo0xCOoDKiFvKx8-s4i7-_dyFImCcpV21HiZaHiuKfi4sGCADCiU36YqTmctuoBpZAqJgCzKLkE87GxJEM6WRbID26oupnSE8n9QGIeIlmgYSv20iGGiySuea9ZiWz3LfJG9soWtM5h3N-bq4ic8LJWtUZdveZrZI87JAB38gQ', postingId: " + val + "}, '/myAccount/co-op/coop-postings.htm', '').submit();"
driver.execute_script(exec_stmt)
time.sleep(7)
exec_stmt = "orbisApp.buildForm({'action':'_-_-ugQvDI001i4Fg7nOJJ1qF9fbCiy7d84POt8Wxa29O5ZZPF8L4gd5lpYISaZRal8jNbibME0gSMfcjwVsJTv5RPJBrZ3mCMV3iDqufArnctF-gZRXzqt6vWZ-FI4tAiHBsREthuYVM8ekffVSFOCzoRDNc6wL8VgBWXjEt1w7MUe3H702479gGsOdk4pZ','postingId':'" + val + "','accessToPostings':'','npfGroup':''}, '/myAccount/co-op/coop-postings.htm', '').submit();"
driver.execute_script(exec_stmt)
time.sleep(7)
company_name = driver.find_elements_by_class_name('span12')[2]
print('-----')
print ('{}'.format(' '.join(company_name.text.split(' ')[1:])))
print('--')
table = driver.find_elements_by_css_selector('.table.table-bordered.table-striped')
table_header = table[1].find_element_by_tag_name('thead')
table_header_entries = table_header.find_elements_by_tag_name('th')[2:]
print (len(table_header_entries))
month_entries = []
for e in table_header_entries:
month_entries.append(e.text)
print('{}'.format(e.text))
table_body = driver.find_elements_by_tag_name('tbody')[2].find_element_by_tag_name('tr')
table_body_entries = table_body.find_elements_by_tag_name('td')[1:]
count_entries = []
for e in table_body_entries:
count_entries.append(e.text)
print('{}'.format(e.text))
print('--')
# Get co-op records
texts = driver.find_elements_by_tag_name('text')
for text in texts:
tspans = text.find_elements_by_tag_name('tspan')
if len(tspans) == 2:
if (is_percentage(tspans[1].text)):
print (tspans[0].text)
print (tspans[1].text.split(' ')[1])
print('*****')
driver.execute_script("window.history.go(-1)")
driver.execute_script("window.history.go(-1)")
view raw wwscraper.py hosted with ❤ by GitHub