#!/bin/sh

# Check if paragraph IDs are unique, print next available ID

fn=$1
if test ! -f "$fn"
then
	echo file not found
	exit 1
fi

awk '

{
	rest = $0
	out = ""
	while(match(rest, "[{][a-z]*[0-9]*:[^}]*[}]")) {
		id = substr(rest, RSTART+1, RLENGTH-2)
		split(id, A, ":")
		curr = int(A[2])
		if (curr > last)
			last = curr
		if (doc == "")
			doc = A[1]
		else if (doc != A[1])
			print "Error: invalid doc id:", id, "in line", NR, "(should be", doc, ")"

		if (id in SEEN)
			print "Error: invalid doc id:", id, "in line", NR, "reused (first used in line", SEEN[id], ")"
		else
			SEEN[id] = NR
		rest = substr(rest, RSTART+RLENGTH, length(rest))
	}
}

END {
	print "Info: next available ID is: {" doc ":" last+1 "}"
}
' < $fn
