Talk:State/Alaska/2010
From AcaDec Scores and Information Center
Jump to navigationJump to search
Parsing PDFs
I created a lexer/parser for getting all the data from the official scoresheets. They're in PDF format, which makes it a bit hard to get things, but if you just go select-all on the pdf then you'll get something like this:
Scores.txt
ID First Name Last Name 1602 Tyler Toms 1801 Stephen Chen 1401 Ava Goepfert 1906 Garret McKinney 1404 Skyler Evans 1204 Brooks Willburn 1609 Damian Cox 2209 Amelia Cooper-Davis 1207 Andy Edsall Anchorage Hilton Top Scores for Event #8 - Speech 02/26/10 School Name Score County/Region/State Place - 1 - Division 1 - Honors 1 - Honors 1 - Honors 2 - Scholastic 2 - Scholastic 2 - Scholastic 3 - V arsity 3 - V arsity 3 - V arsity Lathrop HS West Valley HS Hutchison HS IDEA Team I Hutchison HS Craig HS Lathrop HS Ketchikan HS Craig HS 856.7 1 843.3 2 830.0 3 820.0 1 776.7 2 740.0 3 916.7 1 800.0 2 760.0 3
So here's a Lexer and Parser to make these into a nice wikitable. They can be compiled with lex/flex and yacc/bison, which are both free open-source software. I hereby license these source files under the open-source BSD license.
acadeca_lexer.l
%option noyywrap %option nodefault %{ #define YYSTYPE char* #include "acadeca_parser.tab.h" %} A [aA] B [bB] C [cC] D [dD] E [eE] F [fF] G [gG] H [hH] I [iI] J [jJ] K [kK] L [lL] M [mM] N [nN] O [oO] P [pP] Q [qQ] R [rR] S [sS] T [tT] U [uU] V [vV] W [wW] X [xX] Y [yY] Z [zZ] %% {E}{V}{E}{N}{T} return EVENT; \n return LINE_END; [0-9]+"."[0-9] { yylval=strdup(yytext); return SCORE; } [0-9]+ { yylval=strdup(yytext); return NUMBER; } [a-zA-Z]+("-"[a-zA-Z])* { yylval=strdup(yytext); return WORD; } "/" return SLASH; "-" return DASH; [ \t] //ignore . return UNKNOWN; %%
acadeca_parser.y
%{ #include <stdlib.h> #include <stdio.h> #include <string.h> char * eventTitle = NULL; const char * newRow = "|-----\n"; char * rankNames[3] = {"Gold","Silver","Bronze"}; char * divisionNames[3] = {"Honors","Scholastic","Varsity"}; typedef struct { char * firstName; char * lastName; int rank; char * score; char * school; int division; } medal; int num_medals = 0; int school_on = 0; int score_on = 0; medal medals[100]; void makeTable(char* event, medal* data,int count); #define YYSTYPE char* %} %token EVENT LINE_END SCORE NUMBER WORD SLASH DASH UNKNOWN %% sheet : names event schools scores {$$=eventTitle=$2;} ; wordList : | WORD wordList | LINE_END wordList ; wordLine : WORD wordLine { asprintf(&($$),"%s %s",$1,$2); } | LINE_END { $$ = ""; } ; concatLine : WORD wordLine { asprintf(&($$),"%s%s",$1,$2); } | LINE_END { $$ = ""; } ; line : EVENT line | SCORE line | NUMBER line | WORD line | SLASH line | DASH line | UNKNOWN line | LINE_END ; // NAMES names : wordList nameList; nameList : name nameList | name ; name : NUMBER concatLine concatLine { medals[num_medals].firstName=$2; medals[num_medals].lastName=$3; ++num_medals; } ; // SCHOOLS schools : notSchool schoolList; schoolList : school schoolList | school ; school : wordLine { medals[school_on++].school = $1; } ; notSchool : line line line line divList; divList : NUMBER DASH wordLine divList | NUMBER DASH wordLine; // EVENT event : notEvent EVENT notWord wordLine {$$ = $4;} ; notWord : EVENT notWord | LINE_END notWord | SCORE notWord | NUMBER notWord | SLASH notWord | DASH notWord | UNKNOWN notWord | ; notEvent : LINE_END notEvent | SCORE notEvent | NUMBER notEvent | WORD notEvent | SLASH notEvent | DASH notEvent | UNKNOWN notEvent | ; // scores scores : score LINE_END scores | score ; score : SCORE LINE_END NUMBER { medals[score_on].score = $1; medals[score_on].rank = atoi($3) - 1; if(score_on==0) medals[score_on].division = 0; else if(medals[score_on - 1].rank > medals[score_on].rank) medals[score_on].division = medals[score_on - 1].division + 1; else medals[score_on].division = medals[score_on - 1].division; score_on++; } ; %% int yyerror(char *s) { fprintf(stderr, "error: %s\n", s); } void makeTable(char * eventName, medal* data,int count) { printf("===%s===\n",eventName); printf("{| class = \"wikitable\" width=\"550\"\n"); for(int i = 0; i<num_medals; i++) { if(i==0 || data[i].division != data[i-1].division) { printf(newRow); printf("! %s !! Name !! School !! Score\n", divisionNames[data[i].division]); } printf(newRow); printf("| {{%s|%s}} || %s %s || %s || %s \n", rankNames[medals[i].rank], rankNames[medals[i].rank], medals[i].firstName, medals[i].lastName, medals[i].school, medals[i].score); } printf("|}\n"); } int main(int argc, char **argv) { int i; if(argc < 2) yyparse(); for(i = 1; i < argc; i++) { num_medals=school_on=score_on=0; FILE *f = fopen(argv[i], "r"); if(!f) { perror(argv[i]); return 1; } yyrestart(f); yyparse(); makeTable(eventTitle,medals,num_medals); fclose(f); } }
Rovolo 15:25, 17 October 2010 (PDT)Rovolo
- A+ for effort. gil 03:24, 18 October 2010 (PDT)
- Several other state organizations use exactly the same score format. This will be beyond useful. Hats off to you. Madcap 10:36, 18 October 2010 (PDT)Madcap
- This is over my head, I'm going to need to figure it out, but if this makes transferring scores over, I'm all for it.