Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sql and db driver script for updating review, course_offering and… #18

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
venv/
staticfiles
.env
.pyversion
.exportenv
.python-version
data

### Python ignores (https://github.com/github/gitignore/blob/master/Python.gitignore)
Expand Down
21 changes: 21 additions & 0 deletions apps/web/migrations/0022_review_course_offering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.10.4 on 2017-02-26 04:12
from __future__ import unicode_literals

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('web', '0021_allow_null_course_registration_number'),
]

operations = [
migrations.AddField(
model_name='review',
name='course_offering',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, to='web.CourseOffering'),
),
]
1 change: 1 addition & 0 deletions apps/web/models/review.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class Review(models.Model):
user = models.ForeignKey(User)

professor = models.CharField(max_length=255, db_index=True, blank=False)
course_offering = models.ForeignKey("CourseOffering", null=True)
term = models.CharField(max_length=3, db_index=True, blank=False)
comments = models.TextField(blank=False)

Expand Down
1 change: 1 addition & 0 deletions blob.json

Large diffs are not rendered by default.

8 changes: 6 additions & 2 deletions layup_list/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,12 @@

DATABASES = {
'default': {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
'ENGINE': 'django.db.backends.postgresql',
'NAME': 'layuplist',
'USER': 'lidatong',
'PASSWORD': os.environ['PASSWORD'],
'HOST': '127.0.0.1',
'PORT': '5432',
}
}
DATABASES['default'] = dj_database_url.config()
Expand Down
155 changes: 155 additions & 0 deletions scripts/add_course_offering_to_review.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
from __future__ import division, print_function

import os
import json
import psycopg2

from collections import namedtuple
from psycopg2.extras import NamedTupleCursor


SELECT_INSTRUCTORS = """
WITH matches AS (
SELECT web_review.id AS r_id,
professor,
web_instructor.name,
web_instructor.id AS i_id
FROM web_review
INNER JOIN web_instructor
ON name LIKE SUBSTRING(professor, 0, length(professor)) || '%'
WHERE professor != ''
)
SELECT matches.r_id, matches.i_id FROM (
SELECT matches.r_id,
professor,
COUNT(matches.r_id) AS cnt
FROM matches
GROUP BY matches.r_id, professor) AS uniq
INNER JOIN matches
ON matches.r_id = uniq.r_id
WHERE cnt = 1
"""

SELECT_REVIEWS = """
SELECT id, course_id AS c_id, term
FROM web_review;
"""

SELECT_COURSE_OFFERINGS = """
SELECT *
FROM web_courseoffering;
"""

SELECT_COURSE_OFFERING = """
SELECT id
FROM web_courseoffering
WHERE course_id = %(course_id)s AND term = %(term)s AND period = '';
"""

INSERT_COURSE_OFFERING = """
INSERT INTO web_courseoffering (course_id, term, section, period, created_at, updated_at)
VALUES (%(c_id)s, %(term)s, %(section)s, %(period)s, now(), now())
RETURNING id;
"""


INSERT_CO_INSTRUCTORS = """
INSERT INTO web_courseoffering_instructors (courseoffering_id, instructor_id)
VALUES (%(co_id)s, %(i_id)s)
RETURNING id;
"""

UPDATE_REVIEW = """
UPDATE web_review
SET course_offering_id = (%(co_id)s)
WHERE id = (%(review_id)s)
RETURNING id;
"""


def select(cur, query, params=None):
cur.execute(query, params)
return cur


def insert(cur, query, params=None):
cur.execute(query, params)
return cur.fetchone()[0] # id of just inserted


def update(cur, query, params=None):
cur.execute(query, params)
return cur.fetchone()[0] # id of just inserted


def create_review_to_instructor_mapping(cur):
"""Returns a mapping from review to instructor for those reviews with a
`professor` field that uniquely matches an instructor name

*Important note*: this is not guaranteed to be accurate!

The following is an example where this function can be wrong: Prof. Jane
Austen taught ENGL 1 in 2001, but has left Dartmouth. Prof Jane Albrecht
joined in 2005 and teaches HIST 1. This query will match Jane Albrecht to
Jane A. as the professor who taught ENGL 1 in 2001.

This issue mainly applies to old reviews for which we only have the data
[First Name] + [Last Initial]. Can potentially be made better by doing dept
checks (but doesn't seem worth it).
"""
rows = select(cur, SELECT_INSTRUCTORS)
return {row.r_id: row.i_id for row in rows}


def main():
conn = psycopg2.connect(os.environ["DATABASE_URL"])
cur = conn.cursor(cursor_factory=psycopg2.extras.NamedTupleCursor)
review_to_instructor_mapping = create_review_to_instructor_mapping(cur)

cos = {(row.course_id, row.term)
for row in select(cur, SELECT_COURSE_OFFERINGS)}
new_course_offering_ids = []
new_course_offering_to_instructor_ids = []
updated_review_ids = []
reviews = select(cur, SELECT_REVIEWS).fetchall()
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[review for review in select(cur, SELECT_REVIEWS).fetchall()
 if review.id in review_to_instructor_mapping]

for review in reviews:
if review.id not in review_to_instructor_mapping:
continue
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bleh


print("*********")
print("[review id]", review.id)
if (review.c_id, review.term) not in cos:
print("[adding new row to course offering]", review.c_id, review.term)
params = {"c_id": review.c_id, "term": review.term, "section": 1, "period": ""}
new_co_id = insert(cur, INSERT_COURSE_OFFERING, params)
print("[new_co_id]", new_co_id)
new_course_offering_ids.append(new_co_id)
cos.add((review.c_id, review.term))

instructor_id = review_to_instructor_mapping[review.id]
print("[adding new row to co_to_instructor]", instructor_id)
params = {"co_id": new_co_id, "i_id": instructor_id}
new_co_to_i_id = insert(cur, INSERT_CO_INSTRUCTORS, params)
print("[new_co_to_i_id]", new_co_to_i_id)
new_course_offering_to_instructor_ids.append(new_co_to_i_id)

co_id = new_co_id
else:
params = {"course_id": review.c_id, "term": review.term}
co_id = select(cur, SELECT_COURSE_OFFERING, params).fetchone().id

params = {"co_id": co_id, "review_id": review.id}
print("[updating review's course offering]", review.id, co_id)
updated_review_id = update(cur, UPDATE_REVIEW, params)
updated_review_ids.append(updated_review_id)

blob = {"new_course_offering_ids": new_course_offering_ids,
"new_co_to_i_id": new_course_offering_to_instructor_ids,
"updated_review_ids": updated_review_ids}

with open("blob.json", "w") as blob_file:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need better logging

json.dump(blob, blob_file)


if __name__ == '__main__':
main()