[PATCH] lib/oe/gpg_sign.py: Avoid race when creating .sig files in detach_sign


Tobias Hagelborn
 

Move the signature file into place only after it is successfully signed.
This to avoid race and corrupted .sig files in cases multiple onging
builds write to a shared sstate-cache dir.

Signed-off-by: Tobias Hagelborn <tobiasha@...>
---
meta/lib/oe/gpg_sign.py | 25 +++++++++++++++----------
1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/meta/lib/oe/gpg_sign.py b/meta/lib/oe/gpg_sign.py
index 613dab8561..868846cdc5 100644
--- a/meta/lib/oe/gpg_sign.py
+++ b/meta/lib/oe/gpg_sign.py
@@ -5,11 +5,12 @@
#

"""Helper module for GPG signing"""
-import os

import bb
-import subprocess
+import os
import shlex
+import subprocess
+import tempfile

class LocalSigner(object):
"""Class for handling local (on the build host) signing"""
@@ -73,8 +74,6 @@ class LocalSigner(object):
cmd += ['--homedir', self.gpg_path]
if armor:
cmd += ['--armor']
- if output_suffix:
- cmd += ['-o', input_file + "." + output_suffix]
if use_sha256:
cmd += ['--digest-algo', "SHA256"]

@@ -83,19 +82,25 @@ class LocalSigner(object):
if self.gpg_version > (2,1,):
cmd += ['--pinentry-mode', 'loopback']

- cmd += [input_file]
-
try:
if passphrase_file:
with open(passphrase_file) as fobj:
passphrase = fobj.readline();

- job = subprocess.Popen(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
- (_, stderr) = job.communicate(passphrase.encode("utf-8"))
+ output_file = input_file + "." + (output_suffix or 'sig')
+ with tempfile.TemporaryDirectory(dir=os.path.dirname(output_file)) as tmp_dir:
+ tmp_file = os.path.join(tmp_dir, os.path.basename(output_file))
+ cmd += ['-o', tmp_file]
+
+ cmd += [input_file]
+
+ job = subprocess.Popen(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
+ (_, stderr) = job.communicate(passphrase.encode("utf-8"))

- if job.returncode:
- bb.fatal("GPG exited with code %d: %s" % (job.returncode, stderr.decode("utf-8")))
+ if job.returncode:
+ bb.fatal("GPG exited with code %d: %s" % (job.returncode, stderr.decode("utf-8")))

+ os.rename(tmp_file, output_file)
except IOError as e:
bb.error("IO error (%s): %s" % (e.errno, e.strerror))
raise Exception("Failed to sign '%s'" % input_file)
--
2.30.2


Tobias Hagelborn
 

Some context to this:

We run a shared state-cache and dockerized builds on large servers. Running ~30 builds and ~200 parallel tasks using shared signed-sstate gives this type of error messages (stemming from corruption of the .sig files created):

ERROR: packagegroup-<PACKAGE>-1.0-r0 do_populate_lic: GPG exited with code 2: gpg: signing failed: Bad passphrase

The provided solution with renaming the files into final destination has proven to eliminate the issue.

Cheers
Tobias






From: openembedded-core@... <openembedded-core@...> on behalf of Tobias Hagelborn <Tobias.Hagelborn@...>
Sent: Thursday, March 23, 2023 11:08 AM
To: openembedded-core@... <openembedded-core@...>
Subject: [OE-core] [PATCH] lib/oe/gpg_sign.py: Avoid race when creating .sig files in detach_sign
 
Move the signature file into place only after it is successfully signed.
This to avoid race and corrupted .sig files in cases multiple onging
builds write to a shared sstate-cache dir.

Signed-off-by: Tobias Hagelborn <tobiasha@...>
---
 meta/lib/oe/gpg_sign.py | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/meta/lib/oe/gpg_sign.py b/meta/lib/oe/gpg_sign.py
index 613dab8561..868846cdc5 100644
--- a/meta/lib/oe/gpg_sign.py
+++ b/meta/lib/oe/gpg_sign.py
@@ -5,11 +5,12 @@
 #
 
 """Helper module for GPG signing"""
-import os
 
 import bb
-import subprocess
+import os
 import shlex
+import subprocess
+import tempfile
 
 class LocalSigner(object):
     """Class for handling local (on the build host) signing"""
@@ -73,8 +74,6 @@ class LocalSigner(object):
             cmd += ['--homedir', self.gpg_path]
         if armor:
             cmd += ['--armor']
-        if output_suffix:
-            cmd += ['-o', input_file + "." + output_suffix]
         if use_sha256:
             cmd += ['--digest-algo', "SHA256"]
 
@@ -83,19 +82,25 @@ class LocalSigner(object):
         if self.gpg_version > (2,1,):
             cmd += ['--pinentry-mode', 'loopback']
 
-        cmd += [input_file]
-
         try:
             if passphrase_file:
                 with open(passphrase_file) as fobj:
                     passphrase = fobj.readline();
 
-            job = subprocess.Popen(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
-            (_, stderr) = job.communicate(passphrase.encode("utf-8"))
+            output_file = input_file + "." + (output_suffix or 'sig')
+            with tempfile.TemporaryDirectory(dir=os.path.dirname(output_file)) as tmp_dir:
+                tmp_file = os.path.join(tmp_dir, os.path.basename(output_file))
+                cmd += ['-o', tmp_file]
+
+                cmd += [input_file]
+
+                job = subprocess.Popen(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
+                (_, stderr) = job.communicate(passphrase.encode("utf-8"))
 
-            if job.returncode:
-                bb.fatal("GPG exited with code %d: %s" % (job.returncode, stderr.decode("utf-8")))
+                if job.returncode:
+                    bb.fatal("GPG exited with code %d: %s" % (job.returncode, stderr.decode("utf-8")))
 
+                os.rename(tmp_file, output_file)
         except IOError as e:
             bb.error("IO error (%s): %s" % (e.errno, e.strerror))
             raise Exception("Failed to sign '%s'" % input_file)
--
2.30.2


Richard Purdie
 

On Thu, 2023-03-23 at 11:08 +0100, Tobias Hagelborn wrote:
Move the signature file into place only after it is successfully signed.
This to avoid race and corrupted .sig files in cases multiple onging
builds write to a shared sstate-cache dir.

Signed-off-by: Tobias Hagelborn <tobiasha@...>
---
meta/lib/oe/gpg_sign.py | 25 +++++++++++++++----------
1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/meta/lib/oe/gpg_sign.py b/meta/lib/oe/gpg_sign.py
index 613dab8561..868846cdc5 100644
--- a/meta/lib/oe/gpg_sign.py
+++ b/meta/lib/oe/gpg_sign.py
@@ -5,11 +5,12 @@
#

"""Helper module for GPG signing"""
-import os

import bb
-import subprocess
+import os
import shlex
+import subprocess
+import tempfile

class LocalSigner(object):
"""Class for handling local (on the build host) signing"""
@@ -73,8 +74,6 @@ class LocalSigner(object):
cmd += ['--homedir', self.gpg_path]
if armor:
cmd += ['--armor']
- if output_suffix:
- cmd += ['-o', input_file + "." + output_suffix]
if use_sha256:
cmd += ['--digest-algo', "SHA256"]

@@ -83,19 +82,25 @@ class LocalSigner(object):
if self.gpg_version > (2,1,):
cmd += ['--pinentry-mode', 'loopback']

- cmd += [input_file]
-
try:
if passphrase_file:
with open(passphrase_file) as fobj:
passphrase = fobj.readline();

- job = subprocess.Popen(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
- (_, stderr) = job.communicate(passphrase.encode("utf-8"))
+ output_file = input_file + "." + (output_suffix or 'sig')
This doesn't match the behaviour of output_suffix as used above where
it defaults to None. This forces it to a default of "sig" instead?

If that intentional that should be in the commit message.


+ with tempfile.TemporaryDirectory(dir=os.path.dirname(output_file)) as tmp_dir:
+ tmp_file = os.path.join(tmp_dir, os.path.basename(output_file))
+ cmd += ['-o', tmp_file]
+
+ cmd += [input_file]
+
+ job = subprocess.Popen(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
+ (_, stderr) = job.communicate(passphrase.encode("utf-8"))

- if job.returncode:
- bb.fatal("GPG exited with code %d: %s" % (job.returncode, stderr.decode("utf-8")))
+ if job.returncode:
+ bb.fatal("GPG exited with code %d: %s" % (job.returncode, stderr.decode("utf-8")))

+ os.rename(tmp_file, output_file)
except IOError as e:
bb.error("IO error (%s): %s" % (e.errno, e.strerror))
raise Exception("Failed to sign '%s'" % input_file)
Cheers,

Richard


Richard Purdie
 

On Wed, 2023-03-29 at 23:33 +0100, Richard Purdie via
lists.openembedded.org wrote:
On Thu, 2023-03-23 at 11:08 +0100, Tobias Hagelborn wrote:
Move the signature file into place only after it is successfully signed.
This to avoid race and corrupted .sig files in cases multiple onging
builds write to a shared sstate-cache dir.

Signed-off-by: Tobias Hagelborn <tobiasha@...>
---
meta/lib/oe/gpg_sign.py | 25 +++++++++++++++----------
1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/meta/lib/oe/gpg_sign.py b/meta/lib/oe/gpg_sign.py
index 613dab8561..868846cdc5 100644
--- a/meta/lib/oe/gpg_sign.py
+++ b/meta/lib/oe/gpg_sign.py
@@ -5,11 +5,12 @@
#

"""Helper module for GPG signing"""
-import os

import bb
-import subprocess
+import os
import shlex
+import subprocess
+import tempfile

class LocalSigner(object):
"""Class for handling local (on the build host) signing"""
@@ -73,8 +74,6 @@ class LocalSigner(object):
cmd += ['--homedir', self.gpg_path]
if armor:
cmd += ['--armor']
- if output_suffix:
- cmd += ['-o', input_file + "." + output_suffix]
if use_sha256:
cmd += ['--digest-algo', "SHA256"]

@@ -83,19 +82,25 @@ class LocalSigner(object):
if self.gpg_version > (2,1,):
cmd += ['--pinentry-mode', 'loopback']

- cmd += [input_file]
-
try:
if passphrase_file:
with open(passphrase_file) as fobj:
passphrase = fobj.readline();

- job = subprocess.Popen(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
- (_, stderr) = job.communicate(passphrase.encode("utf-8"))
+ output_file = input_file + "." + (output_suffix or 'sig')
This doesn't match the behaviour of output_suffix as used above where
it defaults to None. This forces it to a default of "sig" instead?

If that intentional that should be in the commit message.


+ with tempfile.TemporaryDirectory(dir=os.path.dirname(output_file)) as tmp_dir:
+ tmp_file = os.path.join(tmp_dir, os.path.basename(output_file))
+ cmd += ['-o', tmp_file]
+
+ cmd += [input_file]
+
+ job = subprocess.Popen(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
+ (_, stderr) = job.communicate(passphrase.encode("utf-8"))

- if job.returncode:
- bb.fatal("GPG exited with code %d: %s" % (job.returncode, stderr.decode("utf-8")))
+ if job.returncode:
+ bb.fatal("GPG exited with code %d: %s" % (job.returncode, stderr.decode("utf-8")))

+ os.rename(tmp_file, output_file)
except IOError as e:
bb.error("IO error (%s): %s" % (e.errno, e.strerror))
raise Exception("Failed to sign '%s'" % input_file)
I've been struggling to confirm it but have now done so. This does
cause an oe-selftest failure as here:

https://autobuilder.yoctoproject.org/typhoon/#/builders/80/builds/4950/steps/14/logs/stdio


i.e. from

oe-selftest -r runtime_test.TestImage.test_testimage_dnf

Cheers,

Richard


Richard Purdie
 

On Thu, 2023-03-30 at 10:10 +0100, Richard Purdie via
lists.openembedded.org wrote:
On Wed, 2023-03-29 at 23:33 +0100, Richard Purdie via
lists.openembedded.org wrote:
On Thu, 2023-03-23 at 11:08 +0100, Tobias Hagelborn wrote:
Move the signature file into place only after it is successfully signed.
This to avoid race and corrupted .sig files in cases multiple onging
builds write to a shared sstate-cache dir.

Signed-off-by: Tobias Hagelborn <tobiasha@...>
---
meta/lib/oe/gpg_sign.py | 25 +++++++++++++++----------
1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/meta/lib/oe/gpg_sign.py b/meta/lib/oe/gpg_sign.py
index 613dab8561..868846cdc5 100644
--- a/meta/lib/oe/gpg_sign.py
+++ b/meta/lib/oe/gpg_sign.py
@@ -5,11 +5,12 @@
#

"""Helper module for GPG signing"""
-import os

import bb
-import subprocess
+import os
import shlex
+import subprocess
+import tempfile

class LocalSigner(object):
"""Class for handling local (on the build host) signing"""
@@ -73,8 +74,6 @@ class LocalSigner(object):
cmd += ['--homedir', self.gpg_path]
if armor:
cmd += ['--armor']
- if output_suffix:
- cmd += ['-o', input_file + "." + output_suffix]
if use_sha256:
cmd += ['--digest-algo', "SHA256"]

@@ -83,19 +82,25 @@ class LocalSigner(object):
if self.gpg_version > (2,1,):
cmd += ['--pinentry-mode', 'loopback']

- cmd += [input_file]
-
try:
if passphrase_file:
with open(passphrase_file) as fobj:
passphrase = fobj.readline();

- job = subprocess.Popen(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
- (_, stderr) = job.communicate(passphrase.encode("utf-8"))
+ output_file = input_file + "." + (output_suffix or 'sig')
This doesn't match the behaviour of output_suffix as used above where
it defaults to None. This forces it to a default of "sig" instead?

If that intentional that should be in the commit message.


+ with tempfile.TemporaryDirectory(dir=os.path.dirname(output_file)) as tmp_dir:
+ tmp_file = os.path.join(tmp_dir, os.path.basename(output_file))
+ cmd += ['-o', tmp_file]
+
+ cmd += [input_file]
+
+ job = subprocess.Popen(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
+ (_, stderr) = job.communicate(passphrase.encode("utf-8"))

- if job.returncode:
- bb.fatal("GPG exited with code %d: %s" % (job.returncode, stderr.decode("utf-8")))
+ if job.returncode:
+ bb.fatal("GPG exited with code %d: %s" % (job.returncode, stderr.decode("utf-8")))

+ os.rename(tmp_file, output_file)
except IOError as e:
bb.error("IO error (%s): %s" % (e.errno, e.strerror))
raise Exception("Failed to sign '%s'" % input_file)
I've been struggling to confirm it but have now done so. This does
cause an oe-selftest failure as here:

https://autobuilder.yoctoproject.org/typhoon/#/builders/80/builds/4950/steps/14/logs/stdio


i.e. from

oe-selftest -r runtime_test.TestImage.test_testimage_dnf
I should also mention, this test fails with oe-core's nodistro for
other reasons but does work ok with poky. I filed a bug for that:

https://bugzilla.yoctoproject.org/show_bug.cgi?id=15086

since it was confusing my debugging! You'd therefore need to use poky
for testing your change until we fix that.

Cheers,

Richard


Tobias Hagelborn
 

Thanks for the test feedback Richard!

I have not run the oe selftest properly on this one and will look in to the self-test results.
(We have not used the signed rpm part for instance)

Cheers
Tobias


From: Richard Purdie <richard.purdie@...>
Sent: Thursday, March 30, 2023 11:10 AM
To: Tobias Hagelborn <Tobias.Hagelborn@...>; openembedded-core@... <openembedded-core@...>
Subject: Re: [OE-core] [PATCH] lib/oe/gpg_sign.py: Avoid race when creating .sig files in detach_sign
 
On Wed, 2023-03-29 at 23:33 +0100, Richard Purdie via
lists.openembedded.org wrote:
> On Thu, 2023-03-23 at 11:08 +0100, Tobias Hagelborn wrote:
> > Move the signature file into place only after it is successfully signed.
> > This to avoid race and corrupted .sig files in cases multiple onging
> > builds write to a shared sstate-cache dir.
> >
> > Signed-off-by: Tobias Hagelborn <tobiasha@...>
> > ---
> >  meta/lib/oe/gpg_sign.py | 25 +++++++++++++++----------
> >  1 file changed, 15 insertions(+), 10 deletions(-)
> >
> > diff --git a/meta/lib/oe/gpg_sign.py b/meta/lib/oe/gpg_sign.py
> > index 613dab8561..868846cdc5 100644
> > --- a/meta/lib/oe/gpg_sign.py
> > +++ b/meta/lib/oe/gpg_sign.py
> > @@ -5,11 +5,12 @@
> >  #
> > 
> >  """Helper module for GPG signing"""
> > -import os
> > 
> >  import bb
> > -import subprocess
> > +import os
> >  import shlex
> > +import subprocess
> > +import tempfile
> > 
> >  class LocalSigner(object):
> >      """Class for handling local (on the build host) signing"""
> > @@ -73,8 +74,6 @@ class LocalSigner(object):
> >              cmd += ['--homedir', self.gpg_path]
> >          if armor:
> >              cmd += ['--armor']
> > -        if output_suffix:
> > -            cmd += ['-o', input_file + "." + output_suffix]
> >          if use_sha256:
> >              cmd += ['--digest-algo', "SHA256"]
> > 
> > @@ -83,19 +82,25 @@ class LocalSigner(object):
> >          if self.gpg_version > (2,1,):
> >              cmd += ['--pinentry-mode', 'loopback']
> > 
> > -        cmd += [input_file]
> > -
> >          try:
> >              if passphrase_file:
> >                  with open(passphrase_file) as fobj:
> >                      passphrase = fobj.readline();
> > 
> > -            job = subprocess.Popen(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
> > -            (_, stderr) = job.communicate(passphrase.encode("utf-8"))
> > +            output_file = input_file + "." + (output_suffix or 'sig')
>
> This doesn't match the behaviour of output_suffix as used above where
> it defaults to None. This forces it to a default of "sig" instead?
>
> If that intentional that should be in the commit message.
>
>
> > +            with tempfile.TemporaryDirectory(dir=os.path.dirname(output_file)) as tmp_dir:
> > +                tmp_file = os.path.join(tmp_dir, os.path.basename(output_file))
> > +                cmd += ['-o', tmp_file]
> > +
> > +                cmd += [input_file]
> > +
> > +                job = subprocess.Popen(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
> > +                (_, stderr) = job.communicate(passphrase.encode("utf-8"))
> > 
> > -            if job.returncode:
> > -                bb.fatal("GPG exited with code %d: %s" % (job.returncode, stderr.decode("utf-8")))
> > +                if job.returncode:
> > +                    bb.fatal("GPG exited with code %d: %s" % (job.returncode, stderr.decode("utf-8")))
> > 
> > +                os.rename(tmp_file, output_file)
> >          except IOError as e:
> >              bb.error("IO error (%s): %s" % (e.errno, e.strerror))
> >              raise Exception("Failed to sign '%s'" % input_file)

I've been struggling to confirm it but have now done so. This does
cause an oe-selftest failure as here:

https://autobuilder.yoctoproject.org/typhoon/#/builders/80/builds/4950/steps/14/logs/stdio


i.e. from

oe-selftest -r runtime_test.TestImage.test_testimage_dnf

Cheers,

Richard



Alexandre Belloni
 

On 30/03/2023 10:10:21+0100, Richard Purdie wrote:
On Wed, 2023-03-29 at 23:33 +0100, Richard Purdie via
lists.openembedded.org wrote:
On Thu, 2023-03-23 at 11:08 +0100, Tobias Hagelborn wrote:
Move the signature file into place only after it is successfully signed.
This to avoid race and corrupted .sig files in cases multiple onging
builds write to a shared sstate-cache dir.

Signed-off-by: Tobias Hagelborn <tobiasha@...>
---
meta/lib/oe/gpg_sign.py | 25 +++++++++++++++----------
1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/meta/lib/oe/gpg_sign.py b/meta/lib/oe/gpg_sign.py
index 613dab8561..868846cdc5 100644
--- a/meta/lib/oe/gpg_sign.py
+++ b/meta/lib/oe/gpg_sign.py
@@ -5,11 +5,12 @@
#

"""Helper module for GPG signing"""
-import os

import bb
-import subprocess
+import os
import shlex
+import subprocess
+import tempfile

class LocalSigner(object):
"""Class for handling local (on the build host) signing"""
@@ -73,8 +74,6 @@ class LocalSigner(object):
cmd += ['--homedir', self.gpg_path]
if armor:
cmd += ['--armor']
- if output_suffix:
- cmd += ['-o', input_file + "." + output_suffix]
if use_sha256:
cmd += ['--digest-algo', "SHA256"]

@@ -83,19 +82,25 @@ class LocalSigner(object):
if self.gpg_version > (2,1,):
cmd += ['--pinentry-mode', 'loopback']

- cmd += [input_file]
-
try:
if passphrase_file:
with open(passphrase_file) as fobj:
passphrase = fobj.readline();

- job = subprocess.Popen(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
- (_, stderr) = job.communicate(passphrase.encode("utf-8"))
+ output_file = input_file + "." + (output_suffix or 'sig')
This doesn't match the behaviour of output_suffix as used above where
it defaults to None. This forces it to a default of "sig" instead?

If that intentional that should be in the commit message.


+ with tempfile.TemporaryDirectory(dir=os.path.dirname(output_file)) as tmp_dir:
+ tmp_file = os.path.join(tmp_dir, os.path.basename(output_file))
+ cmd += ['-o', tmp_file]
+
+ cmd += [input_file]
+
+ job = subprocess.Popen(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
+ (_, stderr) = job.communicate(passphrase.encode("utf-8"))

- if job.returncode:
- bb.fatal("GPG exited with code %d: %s" % (job.returncode, stderr.decode("utf-8")))
+ if job.returncode:
+ bb.fatal("GPG exited with code %d: %s" % (job.returncode, stderr.decode("utf-8")))

+ os.rename(tmp_file, output_file)
except IOError as e:
bb.error("IO error (%s): %s" % (e.errno, e.strerror))
raise Exception("Failed to sign '%s'" % input_file)
I've been struggling to confirm it but have now done so. This does
cause an oe-selftest failure as here:

https://autobuilder.yoctoproject.org/typhoon/#/builders/80/builds/4950/steps/14/logs/stdio


i.e. from

oe-selftest -r runtime_test.TestImage.test_testimage_dnf
I realize just now that I forgot to send an email telling exactly that,
sorry!

Cheers,

Richard




--
Alexandre Belloni, co-owner and COO, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com