Talk:State/Alaska/2010
From AcaDec Scores and Information Center
Jump to navigationJump to search
Parsing PDFs[edit]
I created a lexer/parser for getting all the data from the official scoresheets. They're in PDF format, which makes it a bit hard to get things, but if you just go select-all on the pdf then you'll get something like this:
Scores.txt[edit]
ID First Name Last Name 1602 Tyler Toms 1801 Stephen Chen 1401 Ava Goepfert 1906 Garret McKinney 1404 Skyler Evans 1204 Brooks Willburn 1609 Damian Cox 2209 Amelia Cooper-Davis 1207 Andy Edsall Anchorage Hilton Top Scores for Event #8 - Speech 02/26/10 School Name Score County/Region/State Place - 1 - Division 1 - Honors 1 - Honors 1 - Honors 2 - Scholastic 2 - Scholastic 2 - Scholastic 3 - V arsity 3 - V arsity 3 - V arsity Lathrop HS West Valley HS Hutchison HS IDEA Team I Hutchison HS Craig HS Lathrop HS Ketchikan HS Craig HS 856.7 1 843.3 2 830.0 3 820.0 1 776.7 2 740.0 3 916.7 1 800.0 2 760.0 3
So here's a Lexer and Parser to make these into a nice wikitable. They can be compiled with lex/flex and yacc/bison, which are both free open-source software. I hereby license these source files under the open-source BSD license.
acadeca_lexer.l[edit]
%option noyywrap
%option nodefault
%{
#define YYSTYPE char*
#include "acadeca_parser.tab.h"
%}
A [aA]
B [bB]
C [cC]
D [dD]
E [eE]
F [fF]
G [gG]
H [hH]
I [iI]
J [jJ]
K [kK]
L [lL]
M [mM]
N [nN]
O [oO]
P [pP]
Q [qQ]
R [rR]
S [sS]
T [tT]
U [uU]
V [vV]
W [wW]
X [xX]
Y [yY]
Z [zZ]
%%
{E}{V}{E}{N}{T} return EVENT;
\n return LINE_END;
[0-9]+"."[0-9] {
yylval=strdup(yytext);
return SCORE;
}
[0-9]+ {
yylval=strdup(yytext);
return NUMBER;
}
[a-zA-Z]+("-"[a-zA-Z])*
{
yylval=strdup(yytext);
return WORD;
}
"/" return SLASH;
"-" return DASH;
[ \t] //ignore
. return UNKNOWN;
%%
acadeca_parser.y[edit]
%{
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
char * eventTitle = NULL;
const char * newRow = "|-----\n";
char * rankNames[3] = {"Gold","Silver","Bronze"};
char * divisionNames[3] = {"Honors","Scholastic","Varsity"};
typedef struct {
char * firstName;
char * lastName;
int rank;
char * score;
char * school;
int division;
} medal;
int num_medals = 0;
int school_on = 0;
int score_on = 0;
medal medals[100];
void makeTable(char* event, medal* data,int count);
#define YYSTYPE char*
%}
%token EVENT LINE_END SCORE NUMBER WORD SLASH DASH UNKNOWN
%%
sheet : names event schools scores {$$=eventTitle=$2;}
;
wordList :
| WORD wordList
| LINE_END wordList
;
wordLine : WORD wordLine { asprintf(&($$),"%s %s",$1,$2); }
| LINE_END { $$ = ""; }
;
concatLine : WORD wordLine { asprintf(&($$),"%s%s",$1,$2); }
| LINE_END { $$ = ""; }
;
line : EVENT line
| SCORE line
| NUMBER line
| WORD line
| SLASH line
| DASH line
| UNKNOWN line
| LINE_END
;
// NAMES
names : wordList nameList;
nameList : name nameList
| name
;
name : NUMBER concatLine concatLine {
medals[num_medals].firstName=$2;
medals[num_medals].lastName=$3;
++num_medals;
}
;
// SCHOOLS
schools : notSchool schoolList;
schoolList : school schoolList
| school ;
school : wordLine {
medals[school_on++].school = $1;
}
;
notSchool : line line line line divList;
divList : NUMBER DASH wordLine divList
| NUMBER DASH wordLine;
// EVENT
event : notEvent EVENT notWord wordLine {$$ = $4;}
;
notWord : EVENT notWord
| LINE_END notWord
| SCORE notWord
| NUMBER notWord
| SLASH notWord
| DASH notWord
| UNKNOWN notWord
|
;
notEvent : LINE_END notEvent
| SCORE notEvent
| NUMBER notEvent
| WORD notEvent
| SLASH notEvent
| DASH notEvent
| UNKNOWN notEvent
|
;
// scores
scores : score LINE_END scores
| score
;
score : SCORE LINE_END NUMBER {
medals[score_on].score = $1;
medals[score_on].rank = atoi($3) - 1;
if(score_on==0) medals[score_on].division = 0;
else if(medals[score_on - 1].rank > medals[score_on].rank)
medals[score_on].division = medals[score_on - 1].division + 1;
else
medals[score_on].division = medals[score_on - 1].division;
score_on++;
}
;
%%
int yyerror(char *s) {
fprintf(stderr, "error: %s\n", s);
}
void makeTable(char * eventName, medal* data,int count) {
printf("===%s===\n",eventName);
printf("{| class = \"wikitable\" width=\"550\"\n");
for(int i = 0; i<num_medals; i++) {
if(i==0 || data[i].division != data[i-1].division) {
printf(newRow);
printf("! %s !! Name !! School !! Score\n",
divisionNames[data[i].division]);
}
printf(newRow);
printf("| {{%s|%s}} || %s %s || %s || %s \n",
rankNames[medals[i].rank],
rankNames[medals[i].rank],
medals[i].firstName,
medals[i].lastName,
medals[i].school,
medals[i].score);
}
printf("|}\n");
}
int main(int argc, char **argv) {
int i;
if(argc < 2)
yyparse();
for(i = 1; i < argc; i++) {
num_medals=school_on=score_on=0;
FILE *f = fopen(argv[i], "r");
if(!f) {
perror(argv[i]);
return 1;
}
yyrestart(f);
yyparse();
makeTable(eventTitle,medals,num_medals);
fclose(f);
}
}
Rovolo 15:25, 17 October 2010 (PDT)Rovolo
- A+ for effort. gil 03:24, 18 October 2010 (PDT)
- Several other state organizations use exactly the same score format. This will be beyond useful. Hats off to you. Madcap 10:36, 18 October 2010 (PDT)Madcap
- This is over my head, I'm going to need to figure it out, but if this makes transferring scores over, I'm all for it.