Skip to content

Commit

Permalink
Merge pull request #14 from ShabadOS/bani-fix
Browse files Browse the repository at this point in the history
MINOR release
  • Loading branch information
Harjot1Singh authored Jan 4, 2018
2 parents 5144eff + e62bced commit 1b56a19
Show file tree
Hide file tree
Showing 3,680 changed files with 95,801 additions and 99 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
63 changes: 19 additions & 44 deletions lib/extract.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,28 @@ const main = async () => {
// Write these all to JSON files
await Promise.all( [
[ sources, `${OUTPUT_DIR}/sources.json` ],
[ banis, `${OUTPUT_DIR}/banis.json` ],
[ writers, `${OUTPUT_DIR}/writers.json` ],
[ raags, `${OUTPUT_DIR}/raags.json` ],
[ lineTypes, `${OUTPUT_DIR}/line_types.json` ],
].map( ( [ data, path ] ) => writeJSON( path, data ) ) )

const banisDir = `${OUTPUT_DIR}/banis`
createDir( banisDir )

// Generate the files for each of the banis, with ranges
banis.forEach( async name => {
const id = banis.indexOf( name ) + 1
const path = `${banisDir}/${name}.json`

const lines = ( await knex( 'bani_lines' )
.min( 'line_id as start_line' )
.max( 'line_id as end_line' )
.where( 'bani_id', id )
.groupBy( 'line_group' ) )

await writeJSON( path, lines )
console.log( `Bani ${name} written to ${path}` )
} )

console.log( `Loaded sources: ${sources}` )
sources.forEach( async source => {
Expand Down Expand Up @@ -64,8 +80,6 @@ const main = async () => {

console.log( `Source ${source} has ${count} pages` )

// Used to keep track of banis across angs
const currentBanis = new Set()
// Create directories in 100s up to count
for ( let angBatch = 1; angBatch <= count; angBatch += 100 ) {
const numberDir = `${sourcesDir}/${angBatch}`
Expand All @@ -91,53 +105,14 @@ const main = async () => {
.andWhere( 'source_id', sources.indexOf( source ) + 1 )
.orderBy( 'lines.id' )

const baniLines = await knex( 'bani_lines' )
.select()
.whereBetween( 'line_id', [ lines[ 0 ].id, lines[ lines.length - 1 ].id + 1 ] ) // Get one extra line
// Bucket into groups of line_ids
.reduce( ( lines, { line_id, bani_id: baniId } ) => ( {
...lines,
[ line_id ]: [ ...( lines[ line_id ] || [] ), baniId ],
} ), {} )


// Transform each line into groups of angs with bani ranges attaches
// Transform each line into groups of angs
const angs = lines
.map( line => {
const { id } = line
// A list banis for the current line and next lines
const banis = baniLines[ id ] || []
const nextBaniLine = baniLines[ id + 1 ] || []

// The bani starts here if we aren't currently tracking it
const startBanis = banis.filter( baniId => (
!currentBanis.has( baniId ) && currentBanis.add( baniId )
) )

// The bani ends here if it doesn't appear in the next item, stop tracking it
const endBanis = banis.filter( baniId => (
currentBanis.has( baniId )
&& !nextBaniLine.includes( baniId )
&& currentBanis.delete( baniId )
) )

return {
...line,
start_banis: startBanis,
end_banis: endBanis,
}
} )
// Bucket the lines into angs
.reduce( ( angs, line ) => ( {
...angs,
[ line.ang ]: [ ...( angs[ line.ang ] || [] ),
{ // Remove id and ang from object
{ // Remove ang from object
...line,
ang: undefined,
id: undefined,
// Remove startBanis and endBanis if they're empty
start_banis: line.start_banis.length ? line.start_banis : undefined,
end_banis: line.end_banis.length ? line.end_banis : undefined,
} ],
} ), {} )

Expand Down
3 changes: 2 additions & 1 deletion migrations/schema.js
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ exports.up = knex => Promise.all( [
knex.schema.createTable( 'bani_lines', table => {
table.integer( 'line_id' ).references( 'id' ).inTable( 'lines' )
table.integer( 'bani_id' ).references( 'id' ).inTable( 'banis' )
table.primary( [ 'line_id', 'bani_id' ] )
table.integer( 'line_group' ).notNullable()
table.primary( [ 'line_id', 'bani_id', 'line_group' ] )
} ),
] )

Expand Down
Binary file modified schema.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 0 additions & 4 deletions seeds/banis.json

This file was deleted.

14 changes: 14 additions & 0 deletions seeds/banis/Another Example Bani.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[
{
"start_line": 1,
"end_line": 9
},
{
"start_line": 6,
"end_line": 13
},
{
"start_line": 10,
"end_line": 11
}
]
10 changes: 10 additions & 0 deletions seeds/banis/Japji Sahib.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[
{
"start_line": 1,
"end_line": 3
},
{
"start_line": 4,
"end_line": 5
}
]
94 changes: 57 additions & 37 deletions seeds/seed.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,44 +7,77 @@ const { readdirSync } = require( 'fs' )

const { stripExtension, generateFirstLetters } = require( '../lib/utils' )

const banis = require( './banis' )
const raags = require( './raags' )
const sources = require( './sources' )
const writers = require( './writers' )
const lineTypes = require( './line_types' )

const SHABADS_DIR = 'shabads'
const SOURCES_DIR = 'sources'

const BANIS_DIR = 'banis'

// Map each of sources to a table name
const tables = {
raags,
sources,
writers,
banis,
line_types: lineTypes,
}


exports.seed = knex => knex.transaction( async trx => {
// Transform each of the files and then insert into DB
await Promise.all( Object.entries( tables ).map( ( [ tableName, data ] ) =>
// Transform each array item into { id, name } objects
knex
// Transform each of the independent json files and then insert into DB
const insertTables = ( knex, trx ) => Promise.all( (
Object.entries( tables )
.map( ( [ tableName, data ] ) => knex
.batchInsert( tableName, data.map( ( name, index ) => ( { id: index + 1, name } ) ) )
.transacting( trx ) ) )
.transacting( trx ) )
) )

// Insert all the shabads
// Inserts all the banis into the bani and bani_lines tables
const insertBanis = async ( knex, trx ) => {
// Get the bani names
const banis = readdirSync( `seeds/${BANIS_DIR}` ).map( stripExtension )

// Insert all the banis
for ( const name of banis ) {
const [ baniId ] = await knex( 'banis' )
.insert( { name } )
.returning( '*' )
.transacting( trx )

// eslint-disable-next-line
const baniLines = require( `./${BANIS_DIR}/${name}` )
// Generate line_ids between start_line and end_line
.map( ( { start_line: startLine, end_line: endLine } ) => Array.from(
new Array( ( endLine - startLine ) + 1 ),
( val, index ) => index + startLine,
) )
// Group into their lines, and flatten them
.reduce( ( lines, groups, index ) => [
...lines,
...groups.map( lineId => ( {
bani_id: baniId,
line_id: lineId,
line_group: index + 1,
} ) ),
], [] )

// Insert into the database
await Promise.all( baniLines.map( baniLine => (
knex( 'bani_lines' ).insert( baniLine ).transacting( trx )
) ) )
}
}

// Retrieves, flattens, and inserts the shabads from the various folders
const insertShabads = async ( knex, trx ) => {
const shabads = readdirSync( `seeds/${SHABADS_DIR}` )
.sort( ( s1, s2 ) => sources.indexOf( s1 ) - sources.indexOf( s2 ) )
.reduce( ( data, source ) => [
...data,
...readdirSync( `seeds/${SHABADS_DIR}/${source}` )
.map( stripExtension )
.map( writer => [ writer, `./${SHABADS_DIR}/${source}/${writer}` ] )
// eslint-disable-next-line
.map( ( [ writer, path ] ) => [ writer, require( path ) ] )
.map( writer => [ writer, require( `./${SHABADS_DIR}/${source}/${writer}` ) ] )
.map( ( [ writer, shabads ] ) => shabads.map( shabad => ( {
...shabad,
source_id: sources.indexOf( source ) + 1,
Expand All @@ -53,10 +86,14 @@ exports.seed = knex => knex.transaction( async trx => {
], [] )
.reduce( ( allData, data ) => [ ...allData, ...data ], [] )

// Insert all the shabads
await Promise.all( shabads.map( shabad => (
knex( 'shabads' ).insert( shabad ).transacting( trx )
) ) )
}

// Retrieves, flattens, and inserts all the lines from the nested folders
const insertLines = async ( knex, trx ) => {
// Insert all the lines
const lines = readdirSync( `seeds/${SOURCES_DIR}` )
.sort( ( s1, s2 ) => sources.indexOf( s1 ) - sources.indexOf( s2 ) )
Expand All @@ -69,9 +106,8 @@ exports.seed = knex => knex.transaction( async trx => {
...readdirSync( `seeds/${SOURCES_DIR}/${source}/${batch}` )
.map( stripExtension )
.sort( ( a1, a2 ) => a1 - a2 )
.map( ang => [ ang, `./${SOURCES_DIR}/${source}/${batch}/${ang}` ] )
// eslint-disable-next-line
.map( ( [ ang, path ] ) => [ ang, require( path ) ] )
.map( ang => [ ang, require( `./${SOURCES_DIR}/${source}/${batch}/${ang}` ) ] )
.map( ( [ ang, lines ] ) => lines.map( line => ( {
...line,
ang,
Expand All @@ -81,31 +117,15 @@ exports.seed = knex => knex.transaction( async trx => {
], [] )
.reduce( ( allData, data ) => [ ...allData, ...data ], [] )

// Pluck banis from lines and generate BaniLines from ranges
const { baniLines } = lines.reduce( (
{ currentBanis, baniLines },
{ start_banis: startBanis = [], end_banis: endBanis = [] },
index,
) => {
startBanis.forEach( id => currentBanis.add( id ) )
// Assuming that the lineId in SQLite === index+1 in array here
const lines = [
...baniLines,
...[ ...currentBanis ].map( id => ( { bani_id: id, line_id: index + 1 } ) ),
]
endBanis.forEach( id => currentBanis.delete( id ) )
return { currentBanis, baniLines: lines }
}, { currentBanis: new Set(), baniLines: [] } )

// Use for-of instead to insert lines sequentially and preserve order
for ( const line of lines ) {
delete line.start_banis
delete line.end_banis
await knex( 'lines' ).insert( line ).transacting( trx )
}
}

// Insert the bani lines
await Promise.all( baniLines.map( baniLine => (
knex( 'bani_lines' ).insert( baniLine ).transacting( trx )
) ) )
exports.seed = knex => knex.transaction( async trx => {
await insertTables( knex, trx )
await insertBanis( knex, trx )
await insertShabads( knex, trx )
await insertLines( knex, trx )
} )
4 changes: 4 additions & 0 deletions seeds/sources/A/1/68.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
[
{
"id": 94019,
"shabad_id": 40007,
"pada": null,
"source_line": 0,
Expand All @@ -11,6 +12,7 @@
"type_id": null
},
{
"id": 94020,
"shabad_id": 40007,
"pada": null,
"source_line": 0,
Expand All @@ -22,6 +24,7 @@
"type_id": null
},
{
"id": 94021,
"shabad_id": 40007,
"pada": null,
"source_line": 0,
Expand All @@ -33,6 +36,7 @@
"type_id": null
},
{
"id": 94022,
"shabad_id": 40007,
"pada": null,
"source_line": 0,
Expand Down
9 changes: 9 additions & 0 deletions seeds/sources/A/1/90.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
[
{
"id": 94023,
"shabad_id": 40008,
"pada": null,
"source_line": 0,
Expand All @@ -11,6 +12,7 @@
"type_id": null
},
{
"id": 94024,
"shabad_id": 40008,
"pada": null,
"source_line": 0,
Expand All @@ -22,6 +24,7 @@
"type_id": null
},
{
"id": 94025,
"shabad_id": 40008,
"pada": null,
"source_line": 0,
Expand All @@ -33,6 +36,7 @@
"type_id": null
},
{
"id": 94026,
"shabad_id": 40008,
"pada": null,
"source_line": 0,
Expand All @@ -44,6 +48,7 @@
"type_id": null
},
{
"id": 94027,
"shabad_id": 40008,
"pada": null,
"source_line": 0,
Expand All @@ -55,6 +60,7 @@
"type_id": null
},
{
"id": 94028,
"shabad_id": 40008,
"pada": null,
"source_line": 0,
Expand All @@ -66,6 +72,7 @@
"type_id": null
},
{
"id": 94029,
"shabad_id": 40008,
"pada": null,
"source_line": 0,
Expand All @@ -77,6 +84,7 @@
"type_id": null
},
{
"id": 94030,
"shabad_id": 40008,
"pada": null,
"source_line": 0,
Expand All @@ -88,6 +96,7 @@
"type_id": null
},
{
"id": 94031,
"shabad_id": 40008,
"pada": null,
"source_line": 0,
Expand Down
Loading

0 comments on commit 1b56a19

Please sign in to comment.