Skip to content

Commit

Permalink
Merge pull request #21 from wmde/ores-to-liftwing
Browse files Browse the repository at this point in the history
Implement score calculation using Liftwing instead of ORES. This required multiple changes to the ArticleQualityService as Liftwing calls are built differently.

Bug: T343731
  • Loading branch information
itamargiv authored Oct 19, 2023
2 parents dfbe292 + 0984bde commit 9887233
Show file tree
Hide file tree
Showing 11 changed files with 7,187 additions and 3,694 deletions.
3 changes: 2 additions & 1 deletion .eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ module.exports = {
},
rules: {
"no-console": process.env.NODE_ENV === "production" ? "warn" : "off",
"no-debugger": process.env.NODE_ENV === "production" ? "warn" : "off"
"no-debugger": process.env.NODE_ENV === "production" ? "warn" : "off",
"@typescript-eslint/ban-ts-ignore": "warn"
},
overrides: [
{
Expand Down
10,555 changes: 7,045 additions & 3,510 deletions package-lock.json

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"vue-property-decorator": "^9.1.2",
"vue-router": "^3.2.0",
"vuex": "^3.4.0",
"wikidata-query-gui": "git+https://github.com/wikimedia/wikidata-query-gui.git"
"wikidata-query-gui": "git+https://github.com/wikimedia/wikidata-query-gui.git#a12539cf0c10f55964366ec9054dd27f9d9223a5"
},
"devDependencies": {
"@babel/polyfill": "^7.11.5",
Expand All @@ -49,7 +49,7 @@
"eslint": "^6.7.2",
"eslint-plugin-prettier": "^3.1.3",
"eslint-plugin-vue": "^6.2.2",
"fetch-mock": "^9.11.0",
"jest-fetch-mock": "^3.0.3",
"lint-staged": "^9.5.0",
"mutationobserver-shim": "^0.3.7",
"node-sass": "^4.12.0",
Expand Down
100 changes: 53 additions & 47 deletions src/ArticleQualityService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,20 @@ import asyncPool from "tiny-async-pool";
import {
WikidataResponseParsed,
WikidataResponseRaw,
OresScoresResponse,
OresScore,
QualityScoresResponse,
QualityScore,
Result
} from "./ArticleQualityService.types";

export const ERROR_CODES = {
WIKIDATA_GET: 200,
WIKIDATA_PARSE: 300,
ORES: 400
LIFTWING: 400
};
class ArticleQualityService {
private batchSize = 50;
private maxWorkers = 2;

private oresHost = "https://ores.wikimedia.org";
private weights: { [key: string]: number } = {
E: 1,
D: 2,
Expand Down Expand Up @@ -61,24 +60,26 @@ class ArticleQualityService {
input => this.getLatestRevisions(input)
);

// Get the ORES scores for the revisions
const batchScores = await asyncPool(
// Get the Quality scores for the revisions
const batchResponses = await asyncPool(
this.maxWorkers,
batchRevisions,
input => this.getOresScores(input)
input => this.provideScores(input)
);

// Assign the scores to the revisions
const articleQuality: {
[key: string]: Result;
} = Object.assign({}, ...batchRevisions.flat());
batchScores.map(scores => {
for (const [key, value] of Object.entries(scores || {})) {
articleQuality[key].score = this.computeWeightedSum(
value[this.modelName].score
);
articleQuality[key].probability =
value[this.modelName].score.probability;
batchResponses.forEach(responses => {
for (const response of responses as QualityScoresResponse[]) {
for (const [revId, content] of Object.entries(response || {})) {
articleQuality[revId].score = this.computeWeightedSum(
content[this.modelName].score
);
articleQuality[revId].probability =
content[this.modelName].score.probability;
}
}
});

Expand All @@ -95,7 +96,7 @@ class ArticleQualityService {
}

/**
* Prepares the response from the Wikidata API to send to ORES
* Prepares the response from the Wikidata API to send to Liftwing
*
* @param response Raw JSON response from Wikidata API
*/
Expand Down Expand Up @@ -154,33 +155,38 @@ class ArticleQualityService {
}
}

/**
* Gets the ORES scores for a list of revisions
*
* @param revisions Response from the Wikidata API parsed through parseWikidataResponse()
*/
public async getOresScores(
revisions: WikidataResponseParsed
): Promise<OresScoresResponse | void> {
const validRevisions = Object.values(revisions).filter(rev => !rev.missing);

const queryUrl = `${this.oresHost}/v3/scores/${this.dbname}?models=${
this.modelName
}&revids=${validRevisions.map(rev => rev.revid).join("|")}`;
try {
return await fetch(queryUrl)
.then(data => data.json())
.then(
response => response[this.dbname] && response[this.dbname].scores
);
} catch (e) {
throw {
code: ERROR_CODES.ORES,
description:
"There was a problem connecting to the ORES service. Please check your internet connection or try again later",
message: e.message
};
}
private async provideScores(
batchRevisions: WikidataResponseParsed
): Promise<void | QualityScoresResponse[]> {
// get revisions as numbers
const revisions = this.extractRevisions(batchRevisions);
// get a promise with all the scores for each revision
const scoresPromises = revisions.map(revision =>
this.getQualityScores(revision)
);

return Promise.all(scoresPromises);
}

private extractRevisions(response: WikidataResponseParsed): Array<number> {
return Object.values(response)
.filter(rev => !rev.missing)
.map(rev => Number(rev.revid));
}

private async getQualityScores(
revision: number
): Promise<QualityScoresResponse> {
const response = await fetch(
"https://api.wikimedia.org/service/lw/inference/v1/models/wikidatawiki-itemquality:predict",
{
method: "POST",
/* eslint-disable @typescript-eslint/camelcase */
body: JSON.stringify({ rev_id: revision })
}
);
const data = await response.json();
return data.wikidatawiki.scores;
}

/**
Expand All @@ -202,14 +208,14 @@ class ArticleQualityService {
}

/**
* Computes the weighted sum of an ORES score object
* Computes the weighted sum of a LiftWing score object
*
* The ORES score is calculated by weight of the most relevant score.
* See ORES on https://www.wikidata.org/wiki/Wikidata:Item_quality#ORES
* The LiftWing score is calculated by weight of the most relevant score.
* See LiftWing on https://wikitech.wikimedia.org/wiki/Machine_Learning/LiftWing
*
* @param score An ORES score object
* @param score A quality score object
*/
private computeWeightedSum(score: OresScore) {
private computeWeightedSum(score: QualityScore) {
const clsProba = score.probability;
let weightedSum = 0;
for (const cls in clsProba) {
Expand Down
14 changes: 9 additions & 5 deletions src/ArticleQualityService.types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,19 @@ export interface Result extends WikidataRevision {
};
}

export interface OresScoresResponse {
itemquality: {
score: OresScore;
};
export interface QualityScoresResponse {
[key: string]: ModelScores;
}

export interface OresScore {
export interface QualityScore {
prediction: string;
probability: {
[key: string]: number;
};
}

export interface ModelScores {
[key: string]: {
score: QualityScore;
};
}
2 changes: 1 addition & 1 deletion src/components/CSVGenerator.vue
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ export default Vue.extend({
const results = this.results.map(result => ({
"Item label": result.label,
"Item ID": result.title,
"Weighted ORES Score": result.score.toFixed(2)
"Weighted Quality Score": result.score.toFixed(2)
}));
const csv = Papa.unparse(results);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
<p>
<strong>How is the score calculated</strong>
<br />
We trained the ORES algorithm by giving it Items that were scored to
We trained the quality algorithm by giving it Items that were scored to
belong in one of 5 quality categories. Now, if the algorithm is given a new
Item, it estimates the the likelyhood that a given Item falls in each of
the 5 possible quality categories. To get one score for an Item, we
Expand All @@ -54,13 +54,11 @@
>GitHub</a
>.
<br />
You can read more about
<a href="https://www.mediawiki.org/wiki/ORES" target="_blank">ORES</a>
in Wikidata in
You can read more about quality score calculation in Wikidata
<a
href="https://blog.wikimedia.de/2016/01/02/teaching-machines-to-make-your-life-easier-quality-work-on-wikidata/"
href="https://wikitech.wikimedia.org/wiki/Machine_Learning/LiftWing"
target="_blank"
>this blog post</a
>here</a
>.
</p></b-modal
>
Expand Down
6 changes: 3 additions & 3 deletions src/views/ItemInput.vue
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
Gets the quality scores of Wikidata Items and calculates their average
value.
<br />
<ORESExplanation />
<ScoreExplanation />
</p>
<div class="mt-4">
<h2 class="mb-4">
Expand All @@ -29,15 +29,15 @@
import { Vue } from "vue-property-decorator";
import ItemIdentifierList from "@/components/ItemIdentifierList.vue"; // @ is an alias to /src
import SPARQLEditor from "@/components/SPARQLEditor.vue";
import ORESExplanation from "@/components/ORESExplanation.vue";
import ScoreExplanation from "@/components/ScoreExplanation.vue";
const ACTIVE_TAB_KEY = "wikidata.itemQuality.ui.activeItemsInputTab";
export default Vue.extend({
components: {
ItemIdentifierList,
SPARQLEditor,
ORESExplanation
ScoreExplanation
},
data() {
return {
Expand Down
6 changes: 0 additions & 6 deletions src/views/Results.vue
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,6 @@
</b-col>
</b-row>
<ResultsTable />
<p>
The quality scores are created by ORES, a machine learning tool.
<a href="https://ores.wikimedia.org/" target="_blank"
>Learn more about how it works</a
>
</p>
</div>
</template>
<style lang="scss" scoped>
Expand Down
Loading

0 comments on commit 9887233

Please sign in to comment.