Skip to content
This repository has been archived by the owner on Aug 30, 2024. It is now read-only.

Commit

Permalink
Merge pull request #49 from longhotsummer/akn3
Browse files Browse the repository at this point in the history
Akn3
  • Loading branch information
longhotsummer authored Jun 12, 2020
2 parents 4f9469a + 2d24a10 commit 3fdb43f
Show file tree
Hide file tree
Showing 22 changed files with 7,860 additions and 1,079 deletions.
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,17 @@ You can create your own grammar by creating a gem that provides these files and

## Changelog

### 10.0.0 (?)

* Support creating XML with AKN 3 namespace (http://docs.oasis-open.org/legaldocml/ns/akn/3.0)
* Add --namespace option to toggle between generate AKN2 and AKN3 documents
* Remove unnecessary schemaLocation header in root element
* BREAKING: default to AKN3 namespace
* BREAKING: replace id attributes with eId attributes
* BREAKING: serialize schedules as attachments to act, not as components as peers of the act
* BREAKING: anonymous blocks are serialized as hcontainers, not paragraphs
* BREAKING: crossheading hcontainer IDs correctly use hcontainer

### 9.2.0 (10 June 2020)

* Subpart numbers are optional
Expand Down
10 changes: 9 additions & 1 deletion bin/slaw
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class SlawCLI < Thor
option :section_number_position, enum: ['before-title', 'after-title', 'guess'], desc: "Where do section titles come in relation to the section number? Default: before-title"
option :grammar, type: :string, desc: "Grammar name (usually a two-letter country code). Default is za."
option :ascii, type: :boolean, default: false, desc: "Process text as ASCII using %-encoding. This can provide significant speed improvements if the grammar uses only ASCII literals. See https://github.com/cjheath/treetop/issues/31."
option :namespace, enum: ['akn2', 'akn3'], default: 'akn3', desc: 'AKN XML namespace to use.'
def parse(name)
logging

Expand All @@ -33,6 +34,13 @@ class SlawCLI < Thor
text = extractor.extract_from_file(name)
end

case options[:namespace]
when 'akn2'
Slaw.akn_namespace = Slaw::AKN2_NS
when 'akn3'
Slaw.akn_namespace = Slaw::AKN3_NS
end

generator = Slaw::ActGenerator.new(options[:grammar] || 'za')

if options[:fragment]
Expand All @@ -49,7 +57,7 @@ class SlawCLI < Thor

if options[:id_prefix]
prefix = options[:id_prefix]
prefix += "." unless prefix.end_with?('.')
prefix += "__" unless prefix.end_with?('__')
generator.builder.fragment_id_prefix = prefix
end
end
Expand Down
27 changes: 22 additions & 5 deletions lib/slaw/grammars/counters.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,14 @@ class Counters
# Counters for generating element IDs. This is a hash from the element ID
# prefix, to another hash that maps the element type name to a count.
#
# For backwards compatibility, counters always start at -1, and must be
# incremented before being used. This ensures that element ids start at 0.
# This is NOT compatible with AKN 3.0 which requires that element numbers
# start at 1.
# Counters always start at 0, and must be incremented before being used.
# This ensures that element ids start at 1, as per AKN 3.0 spec.
#
# eg.
#
# section-1 => paragraph => 2
#
@@counters = Hash.new{ |h, k| h[k] = Hash.new(-1) }
@@counters = Hash.new{ |h, k| h[k] = Hash.new(0) }

def self.counters
@@counters
Expand All @@ -22,6 +20,25 @@ def self.counters
def self.reset!
@@counters.clear
end

# Clean a <num> value for use in an eId
# See https://docs.oasis-open.org/legaldocml/akn-nc/v1.0/os/akn-nc-v1.0-os.html#_Toc531692306
#
# The number part of the identifiers of such elements corresponds to the
# stripping of all final punctuation, meaningless separations as well as
# redundant characters in the content of the <num> element. The
# representation is case-sensitive
#
# (i) -> i
# 1.2. -> 1-2
# 3a bis -> 3abis
def self.clean(num)
num
.gsub(/[ ()\[\]]/, '')
.gsub(/\.+$/, '')
.gsub(/^\.+/, '')
.gsub(/\.+/, '-')
end
end
end
end
25 changes: 11 additions & 14 deletions lib/slaw/grammars/schedules_nodes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ module Schedules

class ScheduleContainer < Treetop::Runtime::SyntaxNode
def to_xml(b, idprefix="")
b.components { |b|
b.attachments { |b|
schedules.children.elements.each_with_index { |e, i|
e.to_xml(b, idprefix, i+1)
}
Expand Down Expand Up @@ -86,6 +86,9 @@ def schedule_id(heading_text, i)
end

def to_xml(b, idprefix=nil, i=1)
# reset counters for this new schedule document
Slaw::Grammars::Counters.reset!

heading_text = self.schedule_title.heading_text
if not heading_text
heading_text = "Schedule"
Expand All @@ -95,27 +98,28 @@ def to_xml(b, idprefix=nil, i=1)
# the schedule id is derived from the heading
schedule_id = self.schedule_id(heading_text, i)

b.component(id: "component-#{schedule_id}") { |b|
b.doc_(name: schedule_id) { |b|
b.attachment(eId: "att_#{i}") { |b|
schedule_title.to_xml(b, '', heading_text)
b.doc_(name: "schedule") { |b|
b.meta { |b|
b.identification(source: "#slaw") { |b|
b.FRBRWork { |b|
b.FRBRthis(value: "#{WORK_URI}/#{schedule_id}")
b.FRBRthis(value: "#{WORK_URI}/!#{schedule_id}")
b.FRBRuri(value: WORK_URI)
b.FRBRalias(value: heading_text)
b.FRBRdate(date: '1980-01-01', name: 'Generation')
b.FRBRauthor(href: '#council')
b.FRBRcountry(value: 'za')
}
b.FRBRExpression { |b|
b.FRBRthis(value: "#{EXPRESSION_URI}/#{schedule_id}")
b.FRBRthis(value: "#{EXPRESSION_URI}/!#{schedule_id}")
b.FRBRuri(value: EXPRESSION_URI)
b.FRBRdate(date: '1980-01-01', name: 'Generation')
b.FRBRauthor(href: '#council')
b.FRBRlanguage(language: 'eng')
}
b.FRBRManifestation { |b|
b.FRBRthis(value: "#{MANIFESTATION_URI}/#{schedule_id}")
b.FRBRthis(value: "#{MANIFESTATION_URI}/!#{schedule_id}")
b.FRBRuri(value: MANIFESTATION_URI)
b.FRBRdate(date: Time.now.strftime('%Y-%m-%d'), name: 'Generation')
b.FRBRauthor(href: '#slaw')
Expand All @@ -124,14 +128,7 @@ def to_xml(b, idprefix=nil, i=1)
}

b.mainBody { |b|
idprefix = "#{schedule_id}."

# there is no good AKN hierarchy container for schedules, so we
# use hcontainer instead
b.hcontainer(id: schedule_id, name: "schedule") { |b|
schedule_title.to_xml(b, idprefix, heading_text)
body.children.elements.each_with_index { |e| e.to_xml(b, idprefix, i) } if body.is_a? Body
}
body.children.elements.each_with_index { |e| e.to_xml(b, '', i) } if body.is_a? Body
}
}
}
Expand Down
4 changes: 3 additions & 1 deletion lib/slaw/grammars/tables_nodes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ module Grammars
module Tables
class Table < Treetop::Runtime::SyntaxNode
def to_xml(b, idprefix, i=0)
b.table(id: "#{idprefix}table#{i}") { |b|
cnt = Slaw::Grammars::Counters.counters[idprefix]['table'] += 1

b.table(eId: "#{idprefix}table_#{cnt}") { |b|
# we'll gather cells into this row list
rows = []
cells = []
Expand Down
66 changes: 34 additions & 32 deletions lib/slaw/grammars/za/act_nodes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,22 @@ class Act < Treetop::Runtime::SyntaxNode
MANIFESTATION_URI = EXPRESSION_URI

def to_xml(b, idprefix=nil, i=0)
b.act(contains: "originalVersion") { |b|
b.act(contains: 'originalVersion', name: 'act') { |b|
write_meta(b)
write_preface(b)
write_preamble(b)
write_body(b)
write_schedules(b)
}
write_schedules(b)
end

def write_meta(b)
b.meta { |b|
write_identification(b)

b.references(source: "#this") {
b.TLCOrganization(id: 'slaw', href: 'https://github.com/longhotsummer/slaw', showAs: "Slaw")
b.TLCOrganization(id: 'council', href: '/ontology/organization/za/council', showAs: "Council")
b.TLCOrganization(eId: 'slaw', href: 'https://github.com/longhotsummer/slaw', showAs: "Slaw")
b.TLCOrganization(eId: 'council', href: '/ontology/organization/za/council', showAs: "Council")
}
}
end
Expand All @@ -38,7 +38,7 @@ def write_identification(b)
b.FRBRWork { |b|
b.FRBRthis(value: "#{WORK_URI}/main")
b.FRBRuri(value: WORK_URI)
b.FRBRalias(value: 'Short Title')
b.FRBRalias(value: 'Short Title', name: 'title')
b.FRBRdate(date: '1980-01-01', name: 'Generation')
b.FRBRauthor(href: '#council')
b.FRBRcountry(value: 'za')
Expand Down Expand Up @@ -125,7 +125,7 @@ def to_xml(b, *args)
if !stmts.empty?
b.preamble { |b|
stmts.each { |e|
e.preamble_statement.to_xml(b, "")
e.preamble_statement.to_xml(b, "preamble__")
}
}
end
Expand All @@ -138,11 +138,11 @@ def num
end

def to_xml(b, id_prefix='', *args)
id = id_prefix + "part-#{num}"
id = id_prefix + "part_#{Slaw::Grammars::Counters.clean(num)}"

b.part(id: id) { |b|
b.part(eId: id) { |b|
heading.to_xml(b)
children.elements.each_with_index { |e, i| e.to_xml(b, id + '.', i) }
children.elements.each_with_index { |e, i| e.to_xml(b, id + '__', i) }
}
end
end
Expand Down Expand Up @@ -171,13 +171,15 @@ def to_xml(b, id_prefix='', *args)
num = self.num
if num.empty?
num = Slaw::Grammars::Counters.counters[id_prefix]['subpart'] += 1
else
num = Slaw::Grammars::Counters.clean(num)
end

id = id_prefix + "subpart-#{num}"
id = id_prefix + "subpart_#{num}"

b.subpart(id: id) { |b|
b.subpart(eId: id) { |b|
heading.to_xml(b)
children.elements.each_with_index { |e, i| e.to_xml(b, id + '.', i) }
children.elements.each_with_index { |e, i| e.to_xml(b, id + '__', i) }
}
end
end
Expand All @@ -203,11 +205,11 @@ def num
end

def to_xml(b, id_prefix='', *args)
id = id_prefix + "chapter-#{num}"
id = id_prefix + "chp_#{Slaw::Grammars::Counters.clean(num)}"

b.chapter(id: id) { |b|
b.chapter(eId: id) { |b|
heading.to_xml(b)
children.elements.each_with_index { |e, i| e.to_xml(b, id + '.', i) }
children.elements.each_with_index { |e, i| e.to_xml(b, id + '__', i) }
}
end
end
Expand All @@ -233,11 +235,11 @@ def num
end

def to_xml(b, *args)
id = "section-#{num}"
b.section(id: id) { |b|
id = "sec_#{Slaw::Grammars::Counters.clean(num)}"
b.section(eId: id) { |b|
section_title.to_xml(b)

idprefix = "#{id}."
idprefix = "#{id}__"
children.elements.each_with_index { |e, i| e.to_xml(b, idprefix, i) }
}
end
Expand Down Expand Up @@ -308,11 +310,11 @@ def to_xml(b, idprefix='')

class BlockElements < Treetop::Runtime::SyntaxNode
def to_xml(b, idprefix='', i=0)
cnt = Slaw::Grammars::Counters.counters[idprefix]['paragraph'] += 1
id = "#{idprefix}paragraph#{cnt}"
idprefix = "#{id}."
cnt = Slaw::Grammars::Counters.counters[idprefix]['hcontainer'] += 1
id = "#{idprefix}hcontainer_#{cnt}"
idprefix = "#{id}__"

b.paragraph(id: id) { |b|
b.hcontainer(eId: id) { |b|
b.content { |b|
elements.each_with_index { |e, i| e.to_xml(b, idprefix, i) }
}
Expand All @@ -326,10 +328,10 @@ def num
end

def to_xml(b, idprefix, i)
id = idprefix + num.gsub(/[()]/, '')
idprefix = id + "."
id = idprefix + "subsec_" + Slaw::Grammars::Counters.clean(num)
idprefix = id + "__"

b.subsection(id: id) { |b|
b.subsection(eId: id) { |b|
b.num(num)
block_elements_with_inline.to_xml(b, idprefix)
}
Expand All @@ -341,10 +343,10 @@ class Blocklist < Treetop::Runtime::SyntaxNode
# yield to it a builder to insert a listIntroduction node
def to_xml(b, idprefix, i=0, &block)
cnt = Slaw::Grammars::Counters.counters[idprefix]['list'] += 1
id = idprefix + "list#{cnt}"
idprefix = id + '.'
id = idprefix + "list_#{cnt}"
idprefix = id + '__'

b.blockList(id: id, renest: true) { |b|
b.blockList(eId: id, renest: true) { |b|
b.listIntroduction { |b| yield b } if block_given?

elements.each { |e| e.to_xml(b, idprefix) }
Expand All @@ -358,7 +360,7 @@ def num
end

def to_xml(b, idprefix)
b.item(id: idprefix + num.gsub(/[()]/, '')) { |b|
b.item(eId: idprefix + "item_" + Slaw::Grammars::Counters.clean(num)) { |b|
b.num(num)
b.p { |b|
item_content.inline_items.to_xml(b, idprefix) if respond_to? :item_content and item_content.respond_to? :inline_items
Expand All @@ -369,10 +371,10 @@ def to_xml(b, idprefix)

class Crossheading < Treetop::Runtime::SyntaxNode
def to_xml(b, idprefix, i=0)
cnt = Slaw::Grammars::Counters.counters[idprefix]['crossheading'] += 1
id = "#{idprefix}crossheading-#{cnt}"
cnt = Slaw::Grammars::Counters.counters[idprefix]['hcontainer'] += 1
id = "#{idprefix}hcontainer_#{cnt}"

b.hcontainer(id: id, name: 'crossheading') { |b|
b.hcontainer(eId: id, name: 'crossheading') { |b|
b.heading { |b|
inline_items.to_xml(b, idprefix)
}
Expand Down
Loading

0 comments on commit 3fdb43f

Please sign in to comment.