From 622f351dbf8c95ba4f5f4a2dae083485c00061d9 Mon Sep 17 00:00:00 2001 From: taitus Date: Mon, 8 May 2023 09:43:30 +0200 Subject: [PATCH 1/2] Remove pdf metadata In order to remove metadata from PDF documents we will use the exiftool_vendored gem. The following line: Exiftool.new(attachment_path, "-overwrite_original -all:all=") Overwrites the original file with another file without metadata. So far this is the best solution we have found to perform this metadata deletion. When using Exiftool an exception is thrown, so we added a rescue to handle it. Here is a task created where this problem is discussed in issue 28 in the https://github.com/exiftool-rb/exiftool.rb/ repository. We'll wait to see if this will be fixed in future versions. --- Gemfile | 1 + Gemfile.lock | 5 +++++ app/models/document.rb | 11 +++++++++++ 3 files changed, 17 insertions(+) diff --git a/Gemfile b/Gemfile index b2a962331..02fc71637 100644 --- a/Gemfile +++ b/Gemfile @@ -20,6 +20,7 @@ gem "dalli", "~> 3.2.6" gem "delayed_job_active_record", "~> 4.1.7" gem "devise", "~> 4.9.2" gem "devise-security", "~> 0.18.0" +gem "exiftool_vendored", "~> 12.60.0" gem "file_validators", "~> 3.0.0" gem "font-awesome-sass", "~> 5.15.1" # Remember to update vendor/assets/images/fontawesome when updating this gem gem "foundation-rails", "~> 6.6.2.0" diff --git a/Gemfile.lock b/Gemfile.lock index efdc73f7b..f8d8a642e 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -209,6 +209,10 @@ GEM multi_json (>= 1.3) rake execjs (2.8.1) + exiftool (1.2.4) + json + exiftool_vendored (12.60.0) + exiftool (>= 0.7.0) factory_bot (6.2.0) activesupport (>= 5.0.0) factory_bot_rails (6.2.0) @@ -705,6 +709,7 @@ DEPENDENCIES devise-security (~> 0.18.0) email_spec (~> 2.2.2) erb_lint (~> 0.5.0) + exiftool_vendored (~> 12.60.0) factory_bot_rails (~> 6.2.0) faker (~> 3.2.1) file_validators (~> 3.0.0) diff --git a/app/models/document.rb b/app/models/document.rb index 9a48ed615..869f57e10 100644 --- a/app/models/document.rb +++ b/app/models/document.rb @@ -9,6 +9,8 @@ class Document < ApplicationRecord validates :documentable_id, presence: true, if: -> { persisted? } validates :documentable_type, presence: true, if: -> { persisted? } + before_save :remove_metadata + scope :admin, -> { where(admin: true) } def self.humanized_accepted_content_types @@ -36,4 +38,13 @@ class Document < ApplicationRecord def documentable_class association_class end + + def remove_metadata + return unless attachment.attached? + + attachment_path = ActiveStorage::Blob.service.path_for(attachment.key) + Exiftool.new(attachment_path, "-all:all=") + rescue Exiftool::ExiftoolNotInstalled, Exiftool::NoSuchFile + nil + end end From 84b88c0ec34af238e362bdea1ed2042691c7d772 Mon Sep 17 00:00:00 2001 From: taitus Date: Wed, 12 Jul 2023 09:38:40 +0200 Subject: [PATCH 2/2] Allow testing remove metadata from PDF In order to test that we remove metadata from PDF we need add "pdf-reader" gem. With this gem we can check the info from the PDF and ensure that this info is removed. --- Gemfile | 1 + Gemfile.lock | 12 +++++++++ spec/fixtures/files/logo_with_metadata.pdf | Bin 0 -> 6453 bytes spec/system/documents_spec.rb | 27 +++++++++++++++++++++ 4 files changed, 40 insertions(+) create mode 100644 spec/fixtures/files/logo_with_metadata.pdf create mode 100644 spec/system/documents_spec.rb diff --git a/Gemfile b/Gemfile index 02fc71637..cfd4d3942 100644 --- a/Gemfile +++ b/Gemfile @@ -86,6 +86,7 @@ group :test do gem "capybara", "~> 3.39.2" gem "capybara-webmock", "~> 0.7.0" gem "email_spec", "~> 2.2.2" + gem "pdf-reader" gem "rspec-rails", "~> 5.1.2" gem "selenium-webdriver", "~> 4.13.1" gem "simplecov", "~> 0.22.0", require: false diff --git a/Gemfile.lock b/Gemfile.lock index f8d8a642e..337d7f88e 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -6,6 +6,7 @@ GEM GEM remote: https://rubygems.org/ specs: + Ascii85 (1.1.0) actioncable (6.1.7.6) actionpack (= 6.1.7.6) activesupport (= 6.1.7.6) @@ -70,6 +71,7 @@ GEM acts_as_votable (0.14.0) addressable (2.8.5) public_suffix (>= 2.0.2, < 6.0) + afm (0.2.2) ahoy_matey (4.2.1) activesupport (>= 5.2) device_detector @@ -259,6 +261,7 @@ GEM gyoku (1.4.0) builder (>= 2.1.2) rexml (~> 3.0) + hashery (2.1.2) hashie (5.0.0) highline (2.0.3) htmlentities (4.3.4) @@ -412,6 +415,12 @@ GEM parser (3.2.2.3) ast (~> 2.4.1) racc + pdf-reader (2.11.0) + Ascii85 (~> 1.0) + afm (~> 0.2.1) + hashery (~> 2.0) + ruby-rc4 + ttfunk pg (1.4.3) pg_search (2.3.6) activerecord (>= 5.2) @@ -549,6 +558,7 @@ GEM rubocop-capybara (~> 2.17) rubocop-factory_bot (~> 2.22) ruby-progressbar (1.13.0) + ruby-rc4 (0.1.5) ruby-vips (2.1.4) ffi (~> 1.12) ruby2_keywords (0.0.5) @@ -630,6 +640,7 @@ GEM tilt (2.0.10) timeout (0.4.1) tomlrb (2.0.3) + ttfunk (1.7.0) turbolinks (5.2.1) turbolinks-source (~> 5.2) turbolinks-source (5.2.0) @@ -739,6 +750,7 @@ DEPENDENCIES omniauth-rails_csrf_protection (~> 1.0.1) omniauth-twitter (~> 1.4.0) paranoia (~> 2.6.2) + pdf-reader pg (~> 1.4.3) pg_search (~> 2.3.6) pronto (~> 0.11.1) diff --git a/spec/fixtures/files/logo_with_metadata.pdf b/spec/fixtures/files/logo_with_metadata.pdf new file mode 100644 index 0000000000000000000000000000000000000000..ec1a1c48929489a0b9336994e7dd6ebaef2ee321 GIT binary patch literal 6453 zcmb7J2UJr_w>~Mf&=Pu;&_$4hE=uoJx>AJzp?4x+0SVFt=^dnZ!2$}>Q4mlBk!}GZ zT|ht)l_D?kUcLVB-SysjbIv+zGP7sq`}W@1YoC1tjWx7RN{GwA1S{vaw_%b zYG(RoWYmoOQ6=V4eo+c!hv8!-3u6(fuX?zmN(_oPsFV31aICXOy=#qw7p5vXzqV*I z$FpiK^$xtBnGzrX45PFpEPw6%3zY==4_JTkvNUq^z&hbzD6@->xSvdQy@?EAsPpd5 zM0}^vFq96~-PH~Ev#aYvWa8&bB7(#PTqKIA`+E5XnE7Fxut*7szm!4}^1&fd#@GNS ztRL3LTzx1M^ZvNl2A}J$-{!=eUQ%7?ETu)0^3jl#Y zfDZ8jj@|(300ays-NXhZ-eijDmukk_JXYLk***rln(m)6ya6sHx#Ba0DY0 zGcz*{Ju4dv6B`2)GZSeO5QMk}N=8LSM#V%+P0RHEZby#+1SMb$7(+k^0E_@Z5TK(T zfP(~;)PUcOf}D&J3?a^O{v3z=IR_yFL&-rDN3Q`I2nYbfA#eZyeIfD$psdruGT&1H zQ1WQq+S8q$(XgF1N&v|9oyo$3q-WbvU`8I*skMvES3~!QJ6~Ub3gvw*pHnfE+tlxh zN$u2sMV#7cIq=J4+0|Zr6ll2rY76;OM2%;y^;(Q_t5~(3qOj9wVd2gnE2HI1Tk^ke z?O)w9o*!M?{^K+N_)APRG%L_P2MMs#o}Ov*H6S9TCsISg0T7hPfO!Ai$N&hG9L^#G z1|iXmM1GV++8{FU!5VXHMUfl-M1QpwPZ|43@$}pXu5XVllCrMeYSFq_+Wb?rwtDvCBGdV%iZ{a{JS_J`r`2i>`+6yE z#*LRS^KwWTPsx&9PrS+1)%=G4Wj*&K?e)TjdLf5Z1S7reFEMtK6)lkeUtXZ`HHZPnIa$eNgL>PYwyl9x*h+!1slmw-?KB)@7Hg{#$lD%VM1)Uf)|a3; zK8=rQK`vOg#CCZ;*I%M{oJkj>7Ad$uZc}KRxfSr%>~tGVleGFXrtecB<+u+csf&;4 zwk}6s&#V_wT-c#S9JfhlF1EXTjY-#+;$*K>ZP)Wa^_Z1d?UXkLV1pC;KM1vYw*-y5 zk^48@Q)lRG;$6?(OJ)D9xIAkv0N=A>Q{p3#$Yfd z8JO&se!(CJfPliKFoCJ}M;Le|R81K9$(WGnEj539G2w5&CRYaUyL^7SlH6RpQKMgC zch$x;kZ#0Vckqnn17Y+A%UJ%C#@|CK3Ii<~XOUOpQ&*boY^K)A!XEmM>Ym#@y~>ea z`atXap$vh~jIcc0$f(C7egv$NiEInLOyJkoxvWzYYpQVsIIsoXG!O7gRyA%;4IUdh z?S5jW&yL>&{cS5ZcwKjqFJ={n@W3l}aeRY2eq15ez-zONG7MSYN z#6q%oHow0IWvX(zG3bcU@|UR?roW8o!{IS_0(A3j*0bLWzBJs-)oZ}c;bQa6c1Hw9 zP%UEa^4cNy$`-lbKTS&yzQJ+UWajtK9J@0&11tQnM;B+axlghNADsB_1C!#)uc!h- zkSqY8G5$#WR?1%vP7Eu`yc^*^!i$6^U&wk*H@Fo2h$yOv32kucLRL^UQaKfViBk6C zeibovs>4xoVPLwvNsFIm*z_SdbsX9(S8#RLtN-BoOT)s3(flEWMuiW)bzcUnO=Gqt z*3#}ZU}ehd>bC3~$8i>Aq6!SB6j*Lu&KXS}KnbHm+fGAEE#j~8Dc&<0cIl^rBGvVV z1cI4#I>24_Z@)-tC_X#U8;y%e=|8BQGVHuJJKf+wzfwc#b#>I`MV_n&{YpIG9?U~@ zqMyMZAVnQ`s;)q)r+1UCbE>{zT&-9pNcz6zQow){h!_AvGgb2d@k@-sP&HLL&sCI zN1A+BIey%6w5~G4yMt268H~JqX=3!*!`BS_(x5#iooOieN9IrYB2(Tg2nfAz#Kp!> zb)E7PqjcOB^aVG0D$b`!Js3cCcF9e zgG;NOA%27>G1SE0In82?m*zVwkEZ`Ef&DWvKZ}%tp;Sq^wGSyDMQLDz+?}wd+G_u# zmj9M=OUg<}{+V2cfC@5$&CRA+Vusu0+Fvs8Aq96hE>j_2ozY>2n#Adb8-t@Gg@QE5 zw3rYg=bX*EK-w{UPMS>A*CQM!sXoDtpLUyQXfD1>@TA(TlleT8d)Q|+JH6aAIqkX` zI@tmkA4EVO<19hp(#4icAJ@8V85p1UdmoSatf7}(@(B8DCqqOsT0)Ygn5yJ9 z-+a#=h@e$;AFK?h_cGOz)9z3!uC4UEb{$5K`f~5D$&hb*w;4X1g!0 z^t`<})^1j8k(E9K#)r4DCvtH}BQSB_iu7GtOcSA>p1<>eCuChqDZM2C5K9K>9Gx?v7 zMSERH?RiflOnqa0-764O=qWC`BDY@{;rpyJTK;R!^N;)I&Q!1}vFxs~gOaOo1zcN#n)QU@-?;u5c+o$P5yQ4A+4}G9ltR zV0RVheO09?0H!K21q!T&LB=}CCF-R3FF?p+iZR)2EKk|zW2_Pg zj^{8Fj(X_vY^UsV2CPf6E^sOQ_lR=G@p~1P!nc)!sALg&i6?u`IV3sQ+|9ZNnZm4} z;iBv7Vt>!&7uL*XE9oOs4{f5KOlS>XdqPVP+74+uF~hVOLEjy?<#?6119pw;+OgRr zo~NC{$yC-*&j@alecxp#9)wM~~x=xiF3 zZnOZasenjMBi>xsgU?P#QK+3aZQ!)Cb^x1|$_1l(fkLFcAbqN`ar$u~CL=vAO||K> z@_LU{E~#8nFE^#F@)@(N$!;mIHfq-h)eA*wr%7df&YB;nORG)s%vj61B50I8nH8L6 zFJ35Y&wEYpN5w%i*rZ#f+YDcN_gt|Sq4BX|l&)rONrq#rW3FSIytZ>yO_Hy-Ph=5P5l6By=cAs-~u`$=i{Kci(tbI`Qp0zjmncF zN@eeA73-X;8Z2E)&o&G(+NFxy$f-8jHkk#h8R?@eQTBICHj9-^3hN5XI<-4(u088| ziG1yFLp86vsyM#56LSkwN^s(@NvRz7~5yqST5vEN>LBykoH!+YmE5 zxhBSAC1OQp>D~C@?%tEvJ-MECPN99#9`jBOLly6q9@JHzoDJ&S2tAW)clRAj>)F=f zUAkSqeW85`3dNX^n0C6aQ@Ehxt&ySQh_w|$^U!v}JcaNT;d`UHm9qs_Ln1@8L#5O- ztXr&OC)ol<{0Yrm0V(3n;tBR4e#K)JZ!+es*Y*YtCli;0dc1qyO({+vHt&nl>(i%b z-{?!POk3s{roIrHb?1D_`8!RiUmZ6+HY=4*E5#`#Dlv8hb<~EUzF`i*he3PqK2L5f z?@jI-K~F(%Q?0RR19Pnp-;L&-K`Nks8{v^wqq<{wFC8_CDxlE5nECbhu=$VTQnr~5>#qYIELJ}su1~h_w(>u5 zjFX3Z7Jd}R__%t}pnk{xQSc&kGOaacKXv)3=yKw=-8bw(3N$P7e&mQoR*zkrlWx5( zv+g|DNcF+o#ryW3@C()H$?2scQq?2XFWe&=zcr4k31Z^!irPsj3b&)4NSC73Tb{X( zf3#pMQ5i`wI38%_(wcqqhrt(}1c5xk>sHha9`|QlwXucIOa>84I}5oBKF_%aJ`e1N z$+*Y_Uv%vV`VoE5E%VWnt}be#-uBI-v%z-VErc%}Yf0WnZ=LC_=UUGbv7EC$@iSv4 zMJ9JU9NZcO^(^2PD_`}p@pf^SpTX01Zc ze}6Z)8@bV5zFWQ@JQqY6{Px{y%V*0RN2?3dFYD**>J|q-m3(SyJ-^;@4rAl*K<}R9 zoYW{;b-!Kjj)(Sm<%hBlc#fpprS=b3nl=hv_r00yob|po3QC0ZQYal`;%d9r|JB&D zPSv{?^OkV?$?#gqhn7y)YxdVPHgvXAhP9)#%ljTDT^v6%HYrynmoED_G4<)adH+7& z!?`c&R~TzojipK-eS@CjmEV5f_l;-s7LCI6rTEaARpGUSwC^8SWYxOX%iEm^EeLJjh(1i5IM#YA(|$e7ef7bncl)Ye z!(rxp-OUcN(B(b5ebmfuOX%XwqXJII4f4H@;vH9xF1%G-4V^h0p{S<6pgef_)S=s< z)B1dgFwE%0ts3RJ!|`vEF4gTj>qV0e!ws>nNzu*fsH2SUcZ{Bp4Ob6~YBg$o(5}-S zPWYbi?()5z-mT{eS)=n&ogzOTe$5!EEes3ZZ+CSZ=-KidS<6w@o2lxM-<#O=UUptt z7!7vctf-8isqIkS#UB>$JMHwPtgahEl zOc5F6Uy9)owt+T=CPu0Hbw?dpY}8R|_CCHr!7i?oWH9h4p6e8Ezha;Y4ruaVF(g0; zaG;6Y!btM2y;?^^UuhrD3v>_4gK$)rrO*q1`Iy}Jsz&5)1cAd7BDa!UrRLuF;_Iv_ zkCXdt(}RAI&myJy{*%lzidcvq1)Y`K7o$Edl&f8J3(v!qf>Q!CqswRH4yvyNbqi8) zHjK)-N`$??P-yL0yR-qJcB(V1d>5a^2-aO>@w~5M@hs;@M1nOfYO9M?K{&z23)d75{HxN>xWyycUV|IJh+Et9JBQj!wC zOXH-9K52|p@;7t8Oe|;1z)+^XzBnZ5or75G*Y$DnMgCxHmR#rkzT~bp*URFa=PC^O!e~Z}t`o3Ukigh8K;fxJI+MtPt6KBpNZ6t`N zOA@;p21q#~o?j*Ne|xWy{g0A)NWPPU!?&YClfgJVJ_TQh$5ZLxskGqTP`+$cC^f%2 zpb?RgLST>VecIp7Rb_Z^GC3LWfB*!0viyHhSs)G*fC~vAVw9DTCVnLm)HKq9{TBow B?1umV literal 0 HcmV?d00001 diff --git a/spec/system/documents_spec.rb b/spec/system/documents_spec.rb new file mode 100644 index 000000000..d9815be69 --- /dev/null +++ b/spec/system/documents_spec.rb @@ -0,0 +1,27 @@ +require "rails_helper" + +describe "Documents" do + describe "Metadata" do + scenario "download document without metadata" do + login_as(create(:user)) + visit new_proposal_path + + fill_in "Proposal title", with: "debate" + fill_in "Proposal summary", with: "In summary, what we want is..." + fill_in "Full name of the person submitting the proposal", with: "Isabel Garcia" + documentable_attach_new_file(file_fixture("logo_with_metadata.pdf")) + check "I agree to the Privacy Policy and the Terms and conditions of use" + + click_button "Create proposal" + + io = URI.parse("#{app_host}#{polymorphic_path(Document.last.attachment)}").open + reader = PDF::Reader.new(io) + + expect(reader.info[:Keywords]).not_to eq "Test Metadata" + expect(reader.info[:Author]).not_to eq "Test Developer" + expect(reader.info[:Title]).not_to eq "logo_with_metadata.pdf" + expect(reader.info[:Producer]).not_to eq "Test Producer" + expect(reader.info).to eq({}) + end + end +end