Skip to content

Commit

Permalink
wikidata_id
Browse files Browse the repository at this point in the history
this is step one in moving form freebase to wikidata. I added wikidata_id
to the Employer, Location and Subject models. Then I added a migration to
lookup the existing entities in Wikidata using Wikidata's SPARQL endpoint.
The matching logic thus far is:

1. Look up entity using the Freebase ID
2. Use the name of the entity to derive the Wikipedia URL and look that up
3. To search for the label

The next step is to purge entities that don't have Wikidata IDs, and then
to create new suggest functionality that uses Wikidata instead of Freebase.

refs #38
refs #57
  • Loading branch information
edsu committed Sep 22, 2016
1 parent dc0584e commit ffb9d7d
Show file tree
Hide file tree
Showing 3 changed files with 407 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
# -*- coding: utf-8 -*-
import datetime
from south.db import db
from south.v2 import SchemaMigration
from django.db import models


class Migration(SchemaMigration):

def forwards(self, orm):
# Adding field 'Subject.wikidata_id'
db.add_column(u'jobs_subject', 'wikidata_id',
self.gf('django.db.models.fields.CharField')(max_length=100, null=True),
keep_default=False)

# Adding field 'Employer.wikidata_id'
db.add_column(u'jobs_employer', 'wikidata_id',
self.gf('django.db.models.fields.CharField')(max_length=100, null=True),
keep_default=False)

# Adding field 'Location.wikidata_id'
db.add_column(u'jobs_location', 'wikidata_id',
self.gf('django.db.models.fields.CharField')(max_length=100, null=True),
keep_default=False)


def backwards(self, orm):
# Deleting field 'Subject.wikidata_id'
db.delete_column(u'jobs_subject', 'wikidata_id')

# Deleting field 'Employer.wikidata_id'
db.delete_column(u'jobs_employer', 'wikidata_id')

# Deleting field 'Location.wikidata_id'
db.delete_column(u'jobs_location', 'wikidata_id')


models = {
u'auth.group': {
'Meta': {'object_name': 'Group'},
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '80'}),
'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': u"orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'})
},
u'auth.permission': {
'Meta': {'ordering': "(u'content_type__app_label', u'content_type__model', u'codename')", 'unique_together': "((u'content_type', u'codename'),)", 'object_name': 'Permission'},
'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['contenttypes.ContentType']"}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'name': ('django.db.models.fields.CharField', [], {'max_length': '50'})
},
u'auth.user': {
'Meta': {'object_name': 'User'},
'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}),
'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': u"orm['auth.Group']", 'symmetrical': 'False', 'blank': 'True'}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}),
'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': u"orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}),
'username': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '30'})
},
u'contenttypes.contenttype': {
'Meta': {'ordering': "('name',)", 'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"},
'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
'name': ('django.db.models.fields.CharField', [], {'max_length': '100'})
},
u'jobs.employer': {
'Meta': {'object_name': 'Employer'},
'address': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
'city': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
'country': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
'description': ('django.db.models.fields.TextField', [], {'null': 'True'}),
'domain': ('django.db.models.fields.CharField', [], {'max_length': '50'}),
'freebase_id': ('django.db.models.fields.CharField', [], {'max_length': '100', 'null': 'True'}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'name': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
'postal_code': ('django.db.models.fields.CharField', [], {'max_length': '25'}),
'slug': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True'}),
'state': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
'wikidata_id': ('django.db.models.fields.CharField', [], {'max_length': '100', 'null': 'True'})
},
u'jobs.job': {
'Meta': {'ordering': "['-post_date']", 'object_name': 'Job'},
'close_date': ('django.db.models.fields.DateTimeField', [], {'null': 'True'}),
'contact_email': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
'contact_name': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
'creator': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'jobs'", 'null': 'True', 'to': u"orm['auth.User']"}),
'deleted': ('django.db.models.fields.DateTimeField', [], {'null': 'True'}),
'description': ('django.db.models.fields.TextField', [], {}),
'email_message_id': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True'}),
'employer': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'jobs'", 'null': 'True', 'to': u"orm['jobs.Employer']"}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'job_type': ('django.db.models.fields.CharField', [], {'default': "'ft'", 'max_length': '2'}),
'location': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'jobs'", 'null': 'True', 'to': u"orm['jobs.Location']"}),
'origin_url': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True'}),
'page_views': ('django.db.models.fields.IntegerField', [], {'null': 'True'}),
'post_date': ('django.db.models.fields.DateTimeField', [], {'null': 'True'}),
'published': ('django.db.models.fields.DateTimeField', [], {'null': 'True'}),
'published_by': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'published_jobs'", 'null': 'True', 'to': u"orm['auth.User']"}),
'salary_end': ('django.db.models.fields.IntegerField', [], {'null': 'True'}),
'salary_start': ('django.db.models.fields.IntegerField', [], {'null': 'True'}),
'telecommute': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
'title': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
'tweet_date': ('django.db.models.fields.DateTimeField', [], {'null': 'True'}),
'updated': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}),
'url': ('django.db.models.fields.CharField', [], {'max_length': '1024'})
},
u'jobs.jobedit': {
'Meta': {'ordering': "['-created']", 'object_name': 'JobEdit'},
'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'job': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'edits'", 'to': u"orm['jobs.Job']"}),
'user': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'edits'", 'to': u"orm['auth.User']"})
},
u'jobs.keyword': {
'Meta': {'object_name': 'Keyword'},
'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'ignore': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
'name': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
'subject': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'keywords'", 'null': 'True', 'to': u"orm['jobs.Subject']"})
},
u'jobs.location': {
'Meta': {'object_name': 'Location'},
'freebase_id': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'latitude': ('django.db.models.fields.FloatField', [], {'null': 'True'}),
'longitude': ('django.db.models.fields.FloatField', [], {'null': 'True'}),
'name': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
'wikidata_id': ('django.db.models.fields.CharField', [], {'max_length': '100', 'null': 'True'})
},
u'jobs.subject': {
'Meta': {'ordering': "['name']", 'object_name': 'Subject'},
'description': ('django.db.models.fields.TextField', [], {'null': 'True'}),
'freebase_id': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
'freebase_type_id': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'jobs': ('django.db.models.fields.related.ManyToManyField', [], {'symmetrical': 'False', 'related_name': "'subjects'", 'null': 'True', 'to': u"orm['jobs.Job']"}),
'name': ('django.db.models.fields.CharField', [], {'max_length': '500'}),
'slug': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '100'}),
'type': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
'wikidata_id': ('django.db.models.fields.CharField', [], {'max_length': '100', 'null': 'True'})
},
u'jobs.userprofile': {
'Meta': {'object_name': 'UserProfile'},
'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
'facebook_id': ('django.db.models.fields.CharField', [], {'max_length': '100', 'blank': 'True'}),
'github_id': ('django.db.models.fields.CharField', [], {'max_length': '100', 'blank': 'True'}),
'home_url': ('django.db.models.fields.URLField', [], {'max_length': '200', 'blank': 'True'}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'linkedin_id': ('django.db.models.fields.CharField', [], {'max_length': '100', 'blank': 'True'}),
'pic_url': ('django.db.models.fields.URLField', [], {'max_length': '200', 'blank': 'True'}),
'twitter_id': ('django.db.models.fields.CharField', [], {'max_length': '100', 'blank': 'True'}),
'user': ('django.db.models.fields.related.OneToOneField', [], {'related_name': "'profile'", 'unique': 'True', 'to': u"orm['auth.User']"})
}
}

complete_apps = ['jobs']
Loading

0 comments on commit ffb9d7d

Please sign in to comment.