Talk:State/Alaska/2010

From AcaDec Scores and Information Center
Jump to navigationJump to search

Parsing PDFs

I created a lexer/parser for getting all the data from the official scoresheets. They're in PDF format, which makes it a bit hard to get things, but if you just go select-all on the pdf then you'll get something like this:

Scores.txt

ID	First Name Last Name
1602	Tyler
Toms
1801	Stephen
Chen
1401	Ava
Goepfert
1906	Garret
McKinney
1404	Skyler
Evans
1204	Brooks
Willburn
1609	Damian
Cox
2209	Amelia
Cooper-Davis
1207	Andy
Edsall
Anchorage Hilton
Top Scores for Event #8 - Speech
02/26/10
School Name	Score County/Region/State Place
- 1 -
Division
1 - Honors
1 - Honors
1 - Honors
2 - Scholastic
2 - Scholastic
2 - Scholastic
3	-	V arsity
3	-	V arsity
3	-	V arsity
Lathrop HS
West Valley HS
Hutchison HS
IDEA Team I
Hutchison HS
Craig HS
Lathrop HS
Ketchikan HS
Craig HS
856.7
1
843.3
2
830.0
3
820.0
1
776.7
2
740.0
3
916.7
1
800.0
2
760.0
3

So here's a Lexer and Parser to make these into a nice wikitable. They can be compiled with lex/flex and yacc/bison, which are both free open-source software. I hereby license these source files under the open-source BSD license.

acadeca_lexer.l

%option noyywrap
%option nodefault

%{
#define YYSTYPE char*
#include "acadeca_parser.tab.h"
%}

A [aA]
B [bB]
C [cC]
D [dD]
E [eE]
F [fF]
G [gG]
H [hH]
I [iI]
J [jJ]
K [kK]
L [lL]
M [mM]
N [nN]
O [oO]
P [pP]
Q [qQ]
R [rR]
S [sS]
T [tT]
U [uU]
V [vV]
W [wW]
X [xX]
Y [yY]
Z [zZ]

%%

{E}{V}{E}{N}{T}		return EVENT;
\n					return LINE_END;
[0-9]+"."[0-9]		{
						yylval=strdup(yytext);
						return SCORE;
					}
[0-9]+				{
						yylval=strdup(yytext);
						return NUMBER;
					}
[a-zA-Z]+("-"[a-zA-Z])*
					{
						yylval=strdup(yytext);
						return WORD;
					}
"/"					return SLASH;
"-"					return DASH;
[ \t]		//ignore
.					return UNKNOWN;

%%

acadeca_parser.y

%{

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

char * eventTitle = NULL;
const char * newRow = "|-----\n";
char * rankNames[3] = {"Gold","Silver","Bronze"};
char * divisionNames[3] = {"Honors","Scholastic","Varsity"};
typedef struct {
	char * firstName;
	char * lastName;
	int rank;
	char * score;
	char * school;
	int division;
} medal;

int num_medals = 0;
int school_on = 0;
int score_on = 0;
medal medals[100];
void makeTable(char* event, medal* data,int count);

#define YYSTYPE char*

%}

%token EVENT LINE_END SCORE NUMBER WORD SLASH DASH UNKNOWN

%%

sheet : names event schools scores	{$$=eventTitle=$2;}
;

wordList :
 | WORD wordList
 | LINE_END wordList
 ;

wordLine : WORD wordLine  { asprintf(&($$),"%s %s",$1,$2); }
 | LINE_END				  { $$ = ""; }
 ;

concatLine : WORD wordLine  { asprintf(&($$),"%s%s",$1,$2); }
 | LINE_END				  { $$ = ""; }
 ;

line : EVENT line
 | SCORE line
 | NUMBER line
 | WORD line
 | SLASH line
 | DASH line
 | UNKNOWN line
 | LINE_END
 ;

// NAMES
names : wordList nameList;

nameList : name nameList
 | name
 ;

name : NUMBER concatLine concatLine {
 		medals[num_medals].firstName=$2;
 		medals[num_medals].lastName=$3;
 		++num_medals;
 	}
 ;

// SCHOOLS
schools : notSchool schoolList;
schoolList : school schoolList
 | school ;
school : wordLine  {
	medals[school_on++].school = $1;
}
 ;


notSchool : line line line line divList;
divList : NUMBER DASH wordLine divList
 | NUMBER DASH wordLine;


// EVENT 
event : notEvent EVENT notWord wordLine {$$ = $4;}
;

notWord : EVENT notWord
 | LINE_END notWord
 | SCORE notWord
 | NUMBER notWord
 | SLASH notWord
 | DASH notWord
 | UNKNOWN notWord
 |
 ;

notEvent : LINE_END notEvent
 | SCORE notEvent
 | NUMBER notEvent
 | WORD notEvent
 | SLASH notEvent
 | DASH notEvent
 | UNKNOWN notEvent
 | 
 ;

// scores

scores : score LINE_END scores
 | score
 ;

score : SCORE LINE_END NUMBER {
	medals[score_on].score = $1;
	medals[score_on].rank = atoi($3) - 1;
	if(score_on==0) medals[score_on].division = 0;
	else if(medals[score_on - 1].rank > medals[score_on].rank)
		medals[score_on].division = medals[score_on - 1].division + 1;
	else
		medals[score_on].division = medals[score_on - 1].division;
	score_on++;
}
 ;

%%

int yyerror(char *s) {
	fprintf(stderr, "error: %s\n", s);
}

void makeTable(char * eventName, medal* data,int count) {
	printf("===%s===\n",eventName);
	printf("{| class = \"wikitable\" width=\"550\"\n");
	for(int i = 0; i<num_medals; i++) {
		if(i==0 || data[i].division != data[i-1].division) {
			printf(newRow);
			printf("! %s !! Name !! School !! Score\n",
			divisionNames[data[i].division]);
		}
		printf(newRow);
		printf("| {{%s|%s}} || %s %s || %s || %s \n",
			rankNames[medals[i].rank],
			rankNames[medals[i].rank],
			medals[i].firstName,
			medals[i].lastName,
			medals[i].school,
			medals[i].score);
	}
	printf("|}\n");
}

int main(int argc, char **argv) {
	int i;
	if(argc < 2)
		yyparse();

	for(i = 1; i < argc; i++) {
		num_medals=school_on=score_on=0;
		FILE *f = fopen(argv[i], "r");
		if(!f) {
			perror(argv[i]);
			return 1;
		}
		yyrestart(f);
		yyparse();
		makeTable(eventTitle,medals,num_medals);
		fclose(f);
	}
}

Rovolo 15:25, 17 October 2010 (PDT)Rovolo

  • A+ for effort. gil 03:24, 18 October 2010 (PDT)
  • Several other state organizations use exactly the same score format. This will be beyond useful. Hats off to you. Madcap 10:36, 18 October 2010 (PDT)Madcap
  • This is over my head, I'm going to need to figure it out, but if this makes transferring scores over, I'm all for it.