#Import ggplot2 library library(ggplot2) #import gsubfn library (used for regex replacements) library(gsubfn) ###### #read in voynich_trans.txt #voynich_trans.txt is from http://www.voynich.com/interlinear.html #using the following options: #1: "消す" (delete comments) #2: "Prescott Currier" #3: "All Pages" #4: "loci markerを削除する" (delete loci markers) #5: "インラインコメントを削除する" (delete inline comments) ###### voynich = scan("voynich_trans.txt", character(0)) #the transcription was edited to take out non-words as defined by the eva: voynich = gsubfn("[%!-=,]", "", voynich) voynich = gsubfn("[\n(\\s+)]", " ", voynich) #make a "table" of the words, counting occurrences voytable = table(voynich) #make the table sorted voytable = sort(table(voynich), decreasing=TRUE) #make the list of ranks, 1-indexed ranks = c(1:length(voytable)) #and finally, plot the two on log-log axes #which we can then save as PDFs qplot(voytable, y=ranks, main="Zipf's Law Analysis of\nthe Voynich Manuscript", xlab="Count", ylab="Rank", log="xy")+stat_smooth(method=lm)
Residuals:
Min 1Q Median 3Q Max
-1.21935 -0.22343 -0.00705 0.26166 0.53975
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.953148 0.005146 1545.4 <2e-16 ***
log(voytable) -1.087437 0.005342 -203.6 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.3231 on 4878 degrees of freedom
Multiple R-squared: 0.8947, Adjusted R-squared: 0.8947
F-statistic: 4.144e+04 on 1 and 4878 DF, p-value: < 2.2e-16
Residuals:
Min 1Q Median 3Q Max
-0.78328 -0.20811 0.00623 0.27206 0.58929
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.28861 0.01207 521.0 <2e-16 ***
log(mdtable) -1.31075 0.01533 -85.5 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.33 on 924 degrees of freedom
Multiple R-squared: 0.8878, Adjusted R-squared: 0.8877
F-statistic: 7310 on 1 and 924 DF, p-value: < 2.2e-16
![[paper clip]](/courses/source/wiki_images/paper_clip_tilt.png)